From e3e99ee62e8c2d78a02bed5ab8925e39a07dddfb Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 20 Nov 2023 17:01:04 -0800 Subject: [PATCH 001/186] [nexus-db-model] separate out SledUpdate from Sled (#4533) `Sled` consists of several columns that aren't controlled by sled-agent, and we end up in this weird place where we have `Sled` instances that don't reflect reality. I'm working on adding a `provision_state` column which is controlled by the operator, and again for which sled-agent doesn't know. Clean this up by defining a new struct, `SledUpdate`, which only contains the columns sled-agent knows about. The other columns get defaults when `into_insertable` is called. --- nexus/db-model/src/sled.rs | 133 +++++++++++++----- nexus/db-queries/src/db/datastore/mod.rs | 12 +- .../src/db/datastore/physical_disk.rs | 6 +- nexus/db-queries/src/db/datastore/rack.rs | 6 +- nexus/db-queries/src/db/datastore/sled.rs | 62 ++++---- nexus/src/app/sled.rs | 2 +- 6 files changed, 150 insertions(+), 71 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 5e059946ff..ba572901c6 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -62,38 +62,6 @@ pub struct Sled { } impl Sled { - pub fn new( - id: Uuid, - addr: SocketAddrV6, - baseboard: SledBaseboard, - hardware: SledSystemHardware, - rack_id: Uuid, - ) -> Self { - let last_used_address = { - let mut segments = addr.ip().segments(); - segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; - ipv6::Ipv6Addr::from(Ipv6Addr::from(segments)) - }; - Self { - identity: SledIdentity::new(id), - time_deleted: None, - rcgen: Generation::new(), - rack_id, - is_scrimlet: hardware.is_scrimlet, - serial_number: baseboard.serial_number, - part_number: baseboard.part_number, - revision: baseboard.revision, - usable_hardware_threads: SqlU32::new( - hardware.usable_hardware_threads, - ), - usable_physical_ram: hardware.usable_physical_ram, - reservoir_size: hardware.reservoir_size, - ip: ipv6::Ipv6Addr::from(addr.ip()), - port: addr.port().into(), - last_used_address, - } - } - pub fn is_scrimlet(&self) -> bool { self.is_scrimlet } @@ -153,6 +121,107 @@ impl DatastoreCollectionConfig for Sled { type CollectionIdColumn = service::dsl::sled_id; } +/// Form of `Sled` used for updates from sled-agent. This is missing some +/// columns that are present in `Sled` because sled-agent doesn't control them. +#[derive(Debug, Clone)] +pub struct SledUpdate { + id: Uuid, + + pub rack_id: Uuid, + + is_scrimlet: bool, + serial_number: String, + part_number: String, + revision: i64, + + pub usable_hardware_threads: SqlU32, + pub usable_physical_ram: ByteCount, + pub reservoir_size: ByteCount, + + // ServiceAddress (Sled Agent). + pub ip: ipv6::Ipv6Addr, + pub port: SqlU16, +} + +impl SledUpdate { + pub fn new( + id: Uuid, + addr: SocketAddrV6, + baseboard: SledBaseboard, + hardware: SledSystemHardware, + rack_id: Uuid, + ) -> Self { + Self { + id, + rack_id, + is_scrimlet: hardware.is_scrimlet, + serial_number: baseboard.serial_number, + part_number: baseboard.part_number, + revision: baseboard.revision, + usable_hardware_threads: SqlU32::new( + hardware.usable_hardware_threads, + ), + usable_physical_ram: hardware.usable_physical_ram, + reservoir_size: hardware.reservoir_size, + ip: addr.ip().into(), + port: addr.port().into(), + } + } + + /// Converts self into a form used for inserts of new sleds into the + /// database. + /// + /// This form adds default values for fields that are not present in + /// `SledUpdate`. + pub fn into_insertable(self) -> Sled { + let last_used_address = { + let mut segments = self.ip().segments(); + segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; + ipv6::Ipv6Addr::from(Ipv6Addr::from(segments)) + }; + Sled { + identity: SledIdentity::new(self.id), + rcgen: Generation::new(), + time_deleted: None, + rack_id: self.rack_id, + is_scrimlet: self.is_scrimlet, + serial_number: self.serial_number, + part_number: self.part_number, + revision: self.revision, + usable_hardware_threads: self.usable_hardware_threads, + usable_physical_ram: self.usable_physical_ram, + reservoir_size: self.reservoir_size, + ip: self.ip, + port: self.port, + last_used_address, + } + } + + pub fn id(&self) -> Uuid { + self.id + } + + pub fn is_scrimlet(&self) -> bool { + self.is_scrimlet + } + + pub fn ip(&self) -> Ipv6Addr { + self.ip.into() + } + + pub fn address(&self) -> SocketAddrV6 { + self.address_with_port(self.port.into()) + } + + pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { + SocketAddrV6::new(self.ip(), port, 0, 0) + } + + pub fn serial_number(&self) -> &str { + &self.serial_number + } +} + /// A set of constraints that can be placed on operations that select a sled. #[derive(Debug)] pub struct SledReservationConstraints { diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 8be3386183..0612b960c9 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -371,8 +371,8 @@ mod test { use crate::db::model::{ BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession, Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, - Project, Rack, Region, Service, ServiceKind, SiloUser, Sled, - SledBaseboard, SledSystemHardware, SshKey, SystemUpdate, + Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, + SledSystemHardware, SledUpdate, SshKey, SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; @@ -599,14 +599,14 @@ mod test { let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let sled = Sled::new( + let sled_update = SledUpdate::new( sled_id, bogus_addr, sled_baseboard_for_test(), sled_system_hardware_for_test(), rack_id, ); - datastore.sled_upsert(sled).await.unwrap(); + datastore.sled_upsert(sled_update).await.unwrap(); sled_id } @@ -1205,7 +1205,7 @@ mod test { let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new( + let sled1 = db::model::SledUpdate::new( sled1_id, addr1, sled_baseboard_for_test(), @@ -1216,7 +1216,7 @@ mod test { let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new( + let sled2 = db::model::SledUpdate::new( sled2_id, addr2, sled_baseboard_for_test(), diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 3c83b91d21..ecb583ee29 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -141,7 +141,7 @@ mod test { use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; - use crate::db::model::{PhysicalDiskKind, Sled}; + use crate::db::model::{PhysicalDiskKind, Sled, SledUpdate}; use dropshot::PaginationOrder; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; @@ -153,14 +153,14 @@ mod test { let sled_id = Uuid::new_v4(); let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); let rack_id = Uuid::new_v4(); - let sled = Sled::new( + let sled_update = SledUpdate::new( sled_id, addr, sled_baseboard_for_test(), sled_system_hardware_for_test(), rack_id, ); - db.sled_upsert(sled) + db.sled_upsert(sled_update) .await .expect("Could not upsert sled during test prep") } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index ae982d86f8..2cc5880470 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -680,7 +680,7 @@ mod test { use crate::db::model::Sled; use async_bb8_diesel::AsyncSimpleConnection; use internal_params::DnsRecord; - use nexus_db_model::{DnsGroup, InitialDnsGroup}; + use nexus_db_model::{DnsGroup, InitialDnsGroup, SledUpdate}; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::identity::Asset; @@ -870,14 +870,14 @@ mod test { async fn create_test_sled(db: &DataStore) -> Sled { let sled_id = Uuid::new_v4(); let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); - let sled = Sled::new( + let sled_update = SledUpdate::new( sled_id, addr, sled_baseboard_for_test(), sled_system_hardware_for_test(), rack_id(), ); - db.sled_upsert(sled) + db.sled_upsert(sled_update) .await .expect("Could not upsert sled during test prep") } diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index f4f5188057..130c36b496 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -11,9 +11,9 @@ use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; -use crate::db::identity::Asset; use crate::db::model::Sled; use crate::db::model::SledResource; +use crate::db::model::SledUpdate; use crate::db::pagination::paginated; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; @@ -29,21 +29,25 @@ use uuid::Uuid; impl DataStore { /// Stores a new sled in the database. - pub async fn sled_upsert(&self, sled: Sled) -> CreateResult { + pub async fn sled_upsert( + &self, + sled_update: SledUpdate, + ) -> CreateResult { use db::schema::sled::dsl; diesel::insert_into(dsl::sled) - .values(sled.clone()) + .values(sled_update.clone().into_insertable()) .on_conflict(dsl::id) .do_update() .set(( dsl::time_modified.eq(Utc::now()), - dsl::ip.eq(sled.ip), - dsl::port.eq(sled.port), - dsl::rack_id.eq(sled.rack_id), - dsl::is_scrimlet.eq(sled.is_scrimlet()), - dsl::usable_hardware_threads.eq(sled.usable_hardware_threads), - dsl::usable_physical_ram.eq(sled.usable_physical_ram), - dsl::reservoir_size.eq(sled.reservoir_size), + dsl::ip.eq(sled_update.ip), + dsl::port.eq(sled_update.port), + dsl::rack_id.eq(sled_update.rack_id), + dsl::is_scrimlet.eq(sled_update.is_scrimlet()), + dsl::usable_hardware_threads + .eq(sled_update.usable_hardware_threads), + dsl::usable_physical_ram.eq(sled_update.usable_physical_ram), + dsl::reservoir_size.eq(sled_update.reservoir_size), )) .returning(Sled::as_returning()) .get_result_async(&*self.pool_connection_unauthorized().await?) @@ -53,7 +57,7 @@ impl DataStore { e, ErrorHandler::Conflict( ResourceType::Sled, - &sled.id().to_string(), + &sled_update.id().to_string(), ), ) }) @@ -241,7 +245,7 @@ mod test { let sled_id = Uuid::new_v4(); let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); - let mut sled = Sled::new( + let mut sled_update = SledUpdate::new( sled_id, addr, sled_baseboard_for_test(), @@ -249,44 +253,50 @@ mod test { rack_id(), ); let observed_sled = datastore - .sled_upsert(sled.clone()) + .sled_upsert(sled_update.clone()) .await .expect("Could not upsert sled during test prep"); assert_eq!( observed_sled.usable_hardware_threads, - sled.usable_hardware_threads + sled_update.usable_hardware_threads + ); + assert_eq!( + observed_sled.usable_physical_ram, + sled_update.usable_physical_ram ); - assert_eq!(observed_sled.usable_physical_ram, sled.usable_physical_ram); - assert_eq!(observed_sled.reservoir_size, sled.reservoir_size); + assert_eq!(observed_sled.reservoir_size, sled_update.reservoir_size); // Modify the sizes of hardware - sled.usable_hardware_threads = - SqlU32::new(sled.usable_hardware_threads.0 + 1); + sled_update.usable_hardware_threads = + SqlU32::new(sled_update.usable_hardware_threads.0 + 1); const MIB: u64 = 1024 * 1024; - sled.usable_physical_ram = ByteCount::from( + sled_update.usable_physical_ram = ByteCount::from( external::ByteCount::try_from( - sled.usable_physical_ram.0.to_bytes() + MIB, + sled_update.usable_physical_ram.0.to_bytes() + MIB, ) .unwrap(), ); - sled.reservoir_size = ByteCount::from( + sled_update.reservoir_size = ByteCount::from( external::ByteCount::try_from( - sled.reservoir_size.0.to_bytes() + MIB, + sled_update.reservoir_size.0.to_bytes() + MIB, ) .unwrap(), ); // Test that upserting the sled propagates those changes to the DB. let observed_sled = datastore - .sled_upsert(sled.clone()) + .sled_upsert(sled_update.clone()) .await .expect("Could not upsert sled during test prep"); assert_eq!( observed_sled.usable_hardware_threads, - sled.usable_hardware_threads + sled_update.usable_hardware_threads + ); + assert_eq!( + observed_sled.usable_physical_ram, + sled_update.usable_physical_ram ); - assert_eq!(observed_sled.usable_physical_ram, sled.usable_physical_ram); - assert_eq!(observed_sled.reservoir_size, sled.reservoir_size); + assert_eq!(observed_sled.reservoir_size, sled_update.reservoir_size); db.cleanup().await.unwrap(); logctx.cleanup_successful(); diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index da89e7e25a..8189c0a93d 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -51,7 +51,7 @@ impl super::Nexus { SledRole::Scrimlet => true, }; - let sled = db::model::Sled::new( + let sled = db::model::SledUpdate::new( id, info.sa_address, db::model::SledBaseboard { From 39512b7966eed7032d584302c94e507776bacc5d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 05:24:41 +0000 Subject: [PATCH 002/186] Update taiki-e/install-action digest to 8f354f3 (#4537) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`ccc14bd` -> `8f354f3`](https://togithub.com/taiki-e/install-action/compare/ccc14bd...8f354f3) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index cc67b91fce..d79c836fba 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@ccc14bdc8d34cddf54e4f9fb2da0c208427207a3 # v2 + uses: taiki-e/install-action@8f354f35e51028c902e8ab954045e37739acf562 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From aee9602d50d96d168a3308a661a3ad2b5c5c64c2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:15:41 -0800 Subject: [PATCH 003/186] Update Rust crate fs-err to 2.11.0 (#4538) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8cfe908c1..7c30892c8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2291,9 +2291,9 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "fs-err" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5fd9bcbe8b1087cbd395b51498c01bc997cef73e778a80b77a811af5e2d29f" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" dependencies = [ "autocfg", ] diff --git a/Cargo.toml b/Cargo.toml index b18b20aec7..7aa6482bf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -193,7 +193,7 @@ filetime = "0.2.22" flate2 = "1.0.28" flume = "0.11.0" foreign-types = "0.3.2" -fs-err = "2.10.0" +fs-err = "2.11.0" futures = "0.3.29" gateway-client = { path = "clients/gateway-client" } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", default-features = false, features = ["std"] } From 74120386aca7de0f9f6d41428cc21e543d795e37 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 06:51:31 +0000 Subject: [PATCH 004/186] Update Rust crate rpassword to 7.3.1 (#4539) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7c30892c8c..82dd13e0ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6646,13 +6646,13 @@ dependencies = [ [[package]] name = "rpassword" -version = "7.2.0" +version = "7.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6678cf63ab3491898c0d021b493c94c9b221d91295294a2a5746eacbe5928322" +checksum = "80472be3c897911d0137b2d2b9055faf6eeac5b14e324073d83bc17b191d7e3f" dependencies = [ "libc", "rtoolbox", - "winapi", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7aa6482bf2..0d0bf07abf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -304,7 +304,7 @@ regex = "1.10.2" regress = "0.7.1" reqwest = { version = "0.11", default-features = false } ring = "0.16" -rpassword = "7.2.0" +rpassword = "7.3.1" rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.21.9" From 837f646a0f04d821af46384608a346487549e5d3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 00:53:06 -0800 Subject: [PATCH 005/186] Update Rust crate tokio to 1.34.0 (#4540) --- Cargo.lock | 24 ++++++++++++------------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 20 ++++++++++---------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 82dd13e0ab..cc2daa0e01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2501,7 +2501,7 @@ dependencies = [ "serde", "serde-big-array 0.5.1", "slog", - "socket2 0.5.4", + "socket2 0.5.5", "string_cache", "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", @@ -3354,7 +3354,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" dependencies = [ - "socket2 0.5.4", + "socket2 0.5.5", "widestring", "windows-sys 0.48.0", "winreg", @@ -3835,9 +3835,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ "libc", "log", @@ -7778,9 +7778,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", "windows-sys 0.48.0", @@ -8435,9 +8435,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.33.0" +version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" dependencies = [ "backtrace", "bytes", @@ -8447,16 +8447,16 @@ dependencies = [ "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.4", + "socket2 0.5.5", "tokio-macros", "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", @@ -8493,7 +8493,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.8.5", - "socket2 0.5.4", + "socket2 0.5.5", "tokio", "tokio-util", "whoami", diff --git a/Cargo.toml b/Cargo.toml index 0d0bf07abf..881bbf1cee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -363,7 +363,7 @@ textwrap = "0.16.0" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.33.0" +tokio = "1.34.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" tokio-tungstenite = "0.18" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 47ea83f8f2..7aad62ee38 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -92,7 +92,7 @@ subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.33.0", features = ["full", "test-util"] } +tokio = { version = "1.34.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } @@ -188,7 +188,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.33.0", features = ["full", "test-util"] } +tokio = { version = "1.34.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } @@ -207,49 +207,49 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [target.x86_64-unknown-linux-gnu.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } @@ -258,7 +258,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } -mio = { version = "0.8.8", features = ["net", "os-ext"] } +mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } rustix = { version = "0.38.9", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } From 828021fc023460a7be9ad628ce5ff672b672e461 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 00:53:40 -0800 Subject: [PATCH 006/186] Update Rust crate uuid to 1.6.1 (#4541) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc2daa0e01..b324f4919b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9206,9 +9206,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ "getrandom 0.2.10", "serde", diff --git a/Cargo.toml b/Cargo.toml index 881bbf1cee..fb220ba53d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -382,7 +382,7 @@ tufaceous-lib = { path = "tufaceous-lib" } unicode-width = "0.1.11" update-engine = { path = "update-engine" } usdt = "0.3" -uuid = { version = "1.5.0", features = ["serde", "v4"] } +uuid = { version = "1.6.1", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7aad62ee38..1a289bd0cb 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -103,7 +103,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.5.0", features = ["serde", "v4"] } +uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } @@ -199,7 +199,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.5.0", features = ["serde", "v4"] } +uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } From 745eac2d0dba169824c11d57be681f1ec4f2ccf4 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Tue, 21 Nov 2023 07:57:49 -0800 Subject: [PATCH 007/186] Fix #4509 and #4512 (#4528) --- nexus/db-model/src/schema.rs | 5 + .../src/db/datastore/switch_port.rs | 130 +++- nexus/src/app/sagas/mod.rs | 1 + .../app/sagas/switch_port_settings_apply.rs | 646 ++---------------- .../app/sagas/switch_port_settings_clear.rs | 51 +- .../app/sagas/switch_port_settings_common.rs | 577 ++++++++++++++++ nexus/tests/integration_tests/switch_port.rs | 15 + 7 files changed, 827 insertions(+), 598 deletions(-) create mode 100644 nexus/src/app/sagas/switch_port_settings_common.rs diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index e7d625e854..960b53873a 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1329,3 +1329,8 @@ allow_tables_to_appear_in_same_query!( switch_port, switch_port_settings_route_config ); + +allow_tables_to_appear_in_same_query!( + switch_port, + switch_port_settings_bgp_peer_config +); diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index f301750ee9..d7319347f0 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -23,8 +23,8 @@ use crate::db::pagination::paginated; use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; use diesel::result::Error as DieselError; use diesel::{ - ExpressionMethods, JoinOnDsl, NullableExpressionMethods, QueryDsl, - SelectableHelper, + CombineDsl, ExpressionMethods, JoinOnDsl, NullableExpressionMethods, + QueryDsl, SelectableHelper, }; use nexus_types::external_api::params; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -1110,6 +1110,7 @@ impl DataStore { ) -> ListResultVec { use db::schema::{ switch_port::dsl as switch_port_dsl, + switch_port_settings_bgp_peer_config::dsl as bgp_peer_config_dsl, switch_port_settings_route_config::dsl as route_config_dsl, }; @@ -1126,6 +1127,18 @@ impl DataStore { // pagination in the future, or maybe a way to constrain the query to // a rack? .limit(64) + .union( + switch_port_dsl::switch_port + .filter(switch_port_dsl::port_settings_id.is_not_null()) + .inner_join( + bgp_peer_config_dsl::switch_port_settings_bgp_peer_config + .on(switch_port_dsl::port_settings_id + .eq(bgp_peer_config_dsl::port_settings_id.nullable()), + ), + ) + .select(SwitchPort::as_select()) + .limit(64), + ) .load_async::( &*self.pool_connection_authorized(opctx).await?, ) @@ -1133,3 +1146,116 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } + +#[cfg(test)] +mod test { + use crate::db::datastore::{datastore_test, UpdatePrecondition}; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params::{ + BgpAnnounceSetCreate, BgpConfigCreate, BgpPeerConfig, SwitchPortConfig, + SwitchPortGeometry, SwitchPortSettingsCreate, + }; + use omicron_common::api::external::{ + IdentityMetadataCreateParams, Name, NameOrId, + }; + use omicron_test_utils::dev; + use std::collections::HashMap; + use uuid::Uuid; + + #[tokio::test] + async fn test_bgp_boundary_switches() { + let logctx = dev::test_setup_log("test_bgp_boundary_switches"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id: Uuid = + nexus_test_utils::RACK_UUID.parse().expect("parse uuid"); + let switch0: Name = "switch0".parse().expect("parse switch location"); + let qsfp0: Name = "qsfp0".parse().expect("parse qsfp0"); + + let port_result = datastore + .switch_port_create(&opctx, rack_id, switch0.into(), qsfp0.into()) + .await + .expect("switch port create"); + + let announce_set = BgpAnnounceSetCreate { + identity: IdentityMetadataCreateParams { + name: "test-announce-set".parse().unwrap(), + description: "test bgp announce set".into(), + }, + announcement: Vec::new(), + }; + + datastore.bgp_create_announce_set(&opctx, &announce_set).await.unwrap(); + + let bgp_config = BgpConfigCreate { + identity: IdentityMetadataCreateParams { + name: "test-bgp-config".parse().unwrap(), + description: "test bgp config".into(), + }, + asn: 47, + bgp_announce_set_id: NameOrId::Name( + "test-announce-set".parse().unwrap(), + ), + vrf: None, + }; + + datastore.bgp_config_set(&opctx, &bgp_config).await.unwrap(); + + let settings = SwitchPortSettingsCreate { + identity: IdentityMetadataCreateParams { + name: "test-settings".parse().unwrap(), + description: "test settings".into(), + }, + port_config: SwitchPortConfig { + geometry: SwitchPortGeometry::Qsfp28x1, + }, + groups: Vec::new(), + links: HashMap::new(), + interfaces: HashMap::new(), + routes: HashMap::new(), + bgp_peers: HashMap::from([( + "phy0".into(), + BgpPeerConfig { + bgp_announce_set: NameOrId::Name( + "test-announce-set".parse().unwrap(), + ), + bgp_config: NameOrId::Name( + "test-bgp-config".parse().unwrap(), + ), + interface_name: "qsfp0".into(), + addr: "192.168.1.1".parse().unwrap(), + hold_time: 0, + idle_hold_time: 0, + delay_open: 0, + connect_retry: 0, + keepalive: 0, + }, + )]), + addresses: HashMap::new(), + }; + + let settings_result = datastore + .switch_port_settings_create(&opctx, &settings, None) + .await + .unwrap(); + + datastore + .switch_port_set_settings_id( + &opctx, + port_result.id, + Some(settings_result.settings.identity.id), + UpdatePrecondition::DontCare, + ) + .await + .unwrap(); + + let uplink_ports = + datastore.switch_ports_with_uplinks(&opctx).await.unwrap(); + + assert_eq!(uplink_ports.len(), 1); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 5b1843be3d..89e1a10052 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -36,6 +36,7 @@ pub mod snapshot_create; pub mod snapshot_delete; pub mod switch_port_settings_apply; pub mod switch_port_settings_clear; +pub mod switch_port_settings_common; pub mod test_saga; pub mod volume_delete; pub mod volume_remove_rop; diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 0c06d6ff83..aba62b6937 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -3,53 +3,32 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{NexusActionContext, NEXUS_DPD_TAG}; -use crate::app::map_switch_zone_addrs; use crate::app::sagas::retry_until_known_result; +use crate::app::sagas::switch_port_settings_common::{ + api_to_dpd_port_settings, ensure_switch_port_bgp_settings, + ensure_switch_port_uplink, select_mg_client, switch_sled_agent, + write_bootstore_config, +}; use crate::app::sagas::{ declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, }; -use crate::Nexus; use anyhow::Error; use db::datastore::SwitchPortSettingsCombinedResult; -use dpd_client::types::{ - LinkCreate, LinkId, LinkSettings, PortFec, PortId, PortSettings, PortSpeed, - RouteSettingsV4, RouteSettingsV6, -}; -use dpd_client::{Ipv4Cidr, Ipv6Cidr}; -use internal_dns::ServiceName; -use ipnetwork::IpNetwork; -use mg_admin_client::types::Prefix4; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig}; -use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed, NETWORK_KEY}; -use nexus_db_queries::context::OpContext; +use dpd_client::types::PortId; +use nexus_db_model::NETWORK_KEY; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; -use nexus_types::external_api::params; -use omicron_common::address::SLED_AGENT_PORT; use omicron_common::api::external::{self, NameOrId}; use omicron_common::api::internal::shared::{ ParseSwitchLocationError, SwitchLocation, }; use serde::{Deserialize, Serialize}; -use sled_agent_client::types::PortConfigV1; -use sled_agent_client::types::RouteConfig; -use sled_agent_client::types::{BgpConfig, EarlyNetworkConfig}; -use sled_agent_client::types::{ - BgpPeerConfig as OmicronBgpPeerConfig, HostPortConfig, -}; -use std::collections::HashMap; -use std::net::SocketAddrV6; -use std::net::{IpAddr, Ipv6Addr}; +use std::net::IpAddr; use std::str::FromStr; use std::sync::Arc; use steno::ActionError; use uuid::Uuid; -// This is more of an implementation detail of the BGP implementation. It -// defines the maximum time the peering engine will wait for external messages -// before breaking to check for shutdown conditions. -const BGP_SESSION_RESOLUTION: u64 = 100; - // switch port settings apply saga: input parameters #[derive(Debug, Deserialize, Serialize)] @@ -176,91 +155,6 @@ async fn spa_get_switch_port_settings( Ok(port_settings) } -pub(crate) fn api_to_dpd_port_settings( - settings: &SwitchPortSettingsCombinedResult, -) -> Result { - let mut dpd_port_settings = PortSettings { - links: HashMap::new(), - v4_routes: HashMap::new(), - v6_routes: HashMap::new(), - }; - - //TODO breakouts - let link_id = LinkId(0); - - for l in settings.links.iter() { - dpd_port_settings.links.insert( - link_id.to_string(), - LinkSettings { - params: LinkCreate { - autoneg: false, - lane: Some(LinkId(0)), - kr: false, - fec: match l.fec { - SwitchLinkFec::Firecode => PortFec::Firecode, - SwitchLinkFec::Rs => PortFec::Rs, - SwitchLinkFec::None => PortFec::None, - }, - speed: match l.speed { - SwitchLinkSpeed::Speed0G => PortSpeed::Speed0G, - SwitchLinkSpeed::Speed1G => PortSpeed::Speed1G, - SwitchLinkSpeed::Speed10G => PortSpeed::Speed10G, - SwitchLinkSpeed::Speed25G => PortSpeed::Speed25G, - SwitchLinkSpeed::Speed40G => PortSpeed::Speed40G, - SwitchLinkSpeed::Speed50G => PortSpeed::Speed50G, - SwitchLinkSpeed::Speed100G => PortSpeed::Speed100G, - SwitchLinkSpeed::Speed200G => PortSpeed::Speed200G, - SwitchLinkSpeed::Speed400G => PortSpeed::Speed400G, - }, - }, - //TODO won't work for breakouts - addrs: settings - .addresses - .iter() - .map(|a| a.address.ip()) - .collect(), - }, - ); - } - - for r in &settings.routes { - match &r.dst { - IpNetwork::V4(n) => { - let gw = match r.gw.ip() { - IpAddr::V4(gw) => gw, - IpAddr::V6(_) => { - return Err( - "IPv4 destination cannot have IPv6 nexthop".into() - ) - } - }; - dpd_port_settings.v4_routes.insert( - Ipv4Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop: gw }], - ); - } - IpNetwork::V6(n) => { - let gw = match r.gw.ip() { - IpAddr::V6(gw) => gw, - IpAddr::V4(_) => { - return Err( - "IPv6 destination cannot have IPv4 nexthop".into() - ) - } - }; - dpd_port_settings.v6_routes.insert( - Ipv6Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop: gw }], - ); - } - } - } - - Ok(dpd_port_settings) -} - async fn spa_ensure_switch_port_settings( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -380,101 +274,6 @@ async fn spa_undo_ensure_switch_port_settings( Ok(()) } -async fn spa_ensure_switch_port_bgp_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let settings = sagactx - .lookup::("switch_port_settings") - .map_err(|e| { - ActionError::action_failed(format!( - "lookup switch port settings: {e}" - )) - })?; - - ensure_switch_port_bgp_settings(sagactx, settings).await -} - -pub(crate) async fn ensure_switch_port_bgp_settings( - sagactx: NexusActionContext, - settings: SwitchPortSettingsCombinedResult, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - let params = sagactx.saga_params::()?; - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let mg_client: Arc = - select_mg_client(&sagactx).await.map_err(|e| { - ActionError::action_failed(format!("select mg client: {e}")) - })?; - - let mut bgp_peer_configs = Vec::new(); - - for peer in settings.bgp_peers { - let config = nexus - .bgp_config_get(&opctx, peer.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("get bgp config: {e}")) - })?; - - let announcements = nexus - .bgp_announce_list( - &opctx, - ¶ms::BgpAnnounceSetSelector { - name_or_id: NameOrId::Id(config.bgp_announce_set_id), - }, - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get bgp announcements: {e}" - )) - })?; - - let mut prefixes = Vec::new(); - for a in &announcements { - let value = match a.network.ip() { - IpAddr::V4(value) => Ok(value), - IpAddr::V6(_) => Err(ActionError::action_failed( - "IPv6 announcement not yet supported".to_string(), - )), - }?; - prefixes.push(Prefix4 { value, length: a.network.prefix() }); - } - - let bpc = BgpPeerConfig { - asn: *config.asn, - name: format!("{}", peer.addr.ip()), //TODO user defined name? - host: format!("{}:179", peer.addr.ip()), - hold_time: peer.hold_time.0.into(), - idle_hold_time: peer.idle_hold_time.0.into(), - delay_open: peer.delay_open.0.into(), - connect_retry: peer.connect_retry.0.into(), - keepalive: peer.keepalive.0.into(), - resolution: BGP_SESSION_RESOLUTION, - originate: prefixes, - }; - - bgp_peer_configs.push(bpc); - } - - mg_client - .inner - .bgp_apply(&ApplyRequest { - peer_group: params.switch_port_name.clone(), - peers: bgp_peer_configs, - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("apply bgp settings: {e}")) - })?; - - Ok(()) -} async fn spa_undo_ensure_switch_port_bgp_settings( sagactx: NexusActionContext, ) -> Result<(), Error> { @@ -497,9 +296,13 @@ async fn spa_undo_ensure_switch_port_bgp_settings( })?; let mg_client: Arc = - select_mg_client(&sagactx).await.map_err(|e| { - ActionError::action_failed(format!("select mg client (undo): {e}")) - })?; + select_mg_client(&sagactx, &opctx, params.switch_port_id) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "select mg client (undo): {e}" + )) + })?; for peer in settings.bgp_peers { let config = nexus @@ -592,96 +395,39 @@ async fn spa_undo_ensure_switch_port_bootstore_network_settings( async fn spa_ensure_switch_port_uplink( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - ensure_switch_port_uplink(sagactx, false, None).await + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + ensure_switch_port_uplink( + sagactx, + &opctx, + false, + None, + params.switch_port_id, + params.switch_port_name, + ) + .await } async fn spa_undo_ensure_switch_port_uplink( sagactx: NexusActionContext, ) -> Result<(), Error> { - Ok(ensure_switch_port_uplink(sagactx, true, None).await?) -} - -pub(crate) async fn ensure_switch_port_uplink( - sagactx: NexusActionContext, - skip_self: bool, - inject: Option, -) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - - let switch_port = nexus - .get_switch_port(&opctx, params.switch_port_id) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get switch port for uplink: {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err(|e| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - })?; - - let mut uplinks: Vec = Vec::new(); - - // The sled agent uplinks interface is an all or nothing interface, so we - // need to get all the uplink configs for all the ports. - let active_ports = - nexus.active_port_settings(&opctx).await.map_err(|e| { - ActionError::action_failed(format!( - "get active switch port settings: {e}" - )) - })?; - - for (port, info) in &active_ports { - // Since we are undoing establishing uplinks for the settings - // associated with this port we skip adding this ports uplinks - // to the list - effectively removing them. - if skip_self && port.id == switch_port.id { - continue; - } - uplinks.push(HostPortConfig { - port: port.port_name.clone(), - addrs: info.addresses.iter().map(|a| a.address).collect(), - }) - } - - if let Some(id) = inject { - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let settings = nexus - .switch_port_settings_get(&opctx, &id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get switch port settings for injection: {e}" - )) - })?; - uplinks.push(HostPortConfig { - port: params.switch_port_name.clone(), - addrs: settings.addresses.iter().map(|a| a.address).collect(), - }) - } - - let sc = switch_sled_agent(switch_location, &sagactx).await?; - sc.uplink_ensure(&sled_agent_client::types::SwitchPorts { uplinks }) - .await - .map_err(|e| { - ActionError::action_failed(format!("ensure uplink: {e}")) - })?; - - Ok(()) + Ok(ensure_switch_port_uplink( + sagactx, + &opctx, + true, + None, + params.switch_port_id, + params.switch_port_name, + ) + .await?) } // a common route representation for dendrite and port settings @@ -767,307 +513,29 @@ pub(crate) async fn select_dendrite_client( Ok(dpd_client) } -pub(crate) async fn select_mg_client( - sagactx: &NexusActionContext, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let switch_port = nexus - .get_switch_port(&opctx, params.switch_port_id) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get switch port for mg client selection: {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let mg_client: Arc = osagactx - .nexus() - .mg_clients - .get(&switch_location) - .ok_or_else(|| { - ActionError::action_failed(format!( - "requested switch not available: {switch_location}" - )) - })? - .clone(); - Ok(mg_client) -} - -pub(crate) async fn get_scrimlet_address( - location: SwitchLocation, - nexus: &Arc, -) -> Result { - /* TODO this depends on DNS entries only coming from RSS, it's broken - on the upgrade path - nexus - .resolver() - .await - .lookup_socket_v6(ServiceName::Scrimlet(location)) - .await - .map_err(|e| e.to_string()) - .map_err(|e| { - ActionError::action_failed(format!( - "scrimlet dns lookup failed {e}", - )) - }) - */ - let result = nexus - .resolver() - .await - .lookup_all_ipv6(ServiceName::Dendrite) - .await +async fn spa_ensure_switch_port_bgp_settings( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let settings = sagactx + .lookup::("switch_port_settings") .map_err(|e| { ActionError::action_failed(format!( - "scrimlet dns lookup failed {e}", - )) - }); - - let mappings = match result { - Ok(addrs) => map_switch_zone_addrs(&nexus.log, addrs).await, - Err(e) => { - warn!(nexus.log, "Failed to lookup Dendrite address: {e}"); - return Err(ActionError::action_failed(format!( - "switch mapping failed {e}", - ))); - } - }; - - let addr = match mappings.get(&location) { - Some(addr) => addr, - None => { - return Err(ActionError::action_failed(format!( - "address for switch at location: {location} not found", - ))); - } - }; - - let mut segments = addr.segments(); - segments[7] = 1; - let addr = Ipv6Addr::from(segments); - - Ok(SocketAddrV6::new(addr, SLED_AGENT_PORT, 0, 0)) -} - -#[derive(Clone, Debug)] -pub struct EarlyNetworkPortUpdate { - port: PortConfigV1, - bgp_configs: Vec, -} - -pub(crate) async fn bootstore_update( - nexus: &Arc, - opctx: &OpContext, - switch_port_id: Uuid, - switch_port_name: &str, - settings: &SwitchPortSettingsCombinedResult, -) -> Result { - let switch_port = - nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { - ActionError::action_failed(format!( - "get switch port for uplink: {e}" + "lookup switch port settings: {e}" )) })?; - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let mut peer_info = Vec::new(); - let mut bgp_configs = Vec::new(); - for p in &settings.bgp_peers { - let bgp_config = nexus - .bgp_config_get(&opctx, p.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("get bgp config: {e}")) - })?; - - let announcements = nexus - .bgp_announce_list( - &opctx, - ¶ms::BgpAnnounceSetSelector { - name_or_id: NameOrId::Id(bgp_config.bgp_announce_set_id), - }, - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get bgp announcements: {e}" - )) - })?; - - peer_info.push((p, bgp_config.asn.0)); - bgp_configs.push(BgpConfig { - asn: bgp_config.asn.0, - originate: announcements - .iter() - .filter_map(|a| match a.network { - IpNetwork::V4(net) => Some(net.into()), - //TODO v6 - _ => None, - }) - .collect(), - }); - } - - let update = EarlyNetworkPortUpdate { - port: PortConfigV1 { - routes: settings - .routes - .iter() - .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() }) - .collect(), - addresses: settings.addresses.iter().map(|a| a.address).collect(), - switch: switch_location, - port: switch_port_name.into(), - uplink_port_fec: settings - .links - .get(0) - .map(|l| l.fec) - .unwrap_or(SwitchLinkFec::None) - .into(), - uplink_port_speed: settings - .links - .get(0) - .map(|l| l.speed) - .unwrap_or(SwitchLinkSpeed::Speed100G) - .into(), - bgp_peers: peer_info - .iter() - .filter_map(|(p, asn)| { - //TODO v6 - match p.addr.ip() { - IpAddr::V4(addr) => Some(OmicronBgpPeerConfig { - asn: *asn, - port: switch_port_name.into(), - addr, - hold_time: Some(p.hold_time.0.into()), - connect_retry: Some(p.connect_retry.0.into()), - delay_open: Some(p.delay_open.0.into()), - idle_hold_time: Some(p.idle_hold_time.0.into()), - keepalive: Some(p.keepalive.0.into()), - }), - IpAddr::V6(_) => { - warn!(opctx.log, "IPv6 peers not yet supported"); - None - } - } - }) - .collect(), - }, - bgp_configs, - }; - - Ok(update) -} - -pub(crate) async fn read_bootstore_config( - sa: &sled_agent_client::Client, -) -> Result { - Ok(sa - .read_network_bootstore_config_cache() - .await - .map_err(|e| { - ActionError::action_failed(format!( - "read bootstore network config: {e}" - )) - })? - .into_inner()) -} - -pub(crate) async fn write_bootstore_config( - sa: &sled_agent_client::Client, - config: &EarlyNetworkConfig, -) -> Result<(), ActionError> { - sa.write_network_bootstore_config(config).await.map_err(|e| { - ActionError::action_failed(format!( - "write bootstore network config: {e}" - )) - })?; - Ok(()) -} - -#[derive(Clone, Debug, Default)] -pub(crate) struct BootstoreNetworkPortChange { - previous_port_config: Option, - changed_bgp_configs: Vec, - added_bgp_configs: Vec, -} - -pub(crate) fn apply_bootstore_update( - config: &mut EarlyNetworkConfig, - update: &EarlyNetworkPortUpdate, -) -> Result { - let mut change = BootstoreNetworkPortChange::default(); - - let rack_net_config = match &mut config.body.rack_network_config { - Some(cfg) => cfg, - None => { - return Err(ActionError::action_failed( - "rack network config not yet initialized".to_string(), - )) - } - }; - - for port in &mut rack_net_config.ports { - if port.port == update.port.port { - change.previous_port_config = Some(port.clone()); - *port = update.port.clone(); - break; - } - } - if change.previous_port_config.is_none() { - rack_net_config.ports.push(update.port.clone()); - } - - for updated_bgp in &update.bgp_configs { - let mut exists = false; - for resident_bgp in &mut rack_net_config.bgp { - if resident_bgp.asn == updated_bgp.asn { - change.changed_bgp_configs.push(resident_bgp.clone()); - *resident_bgp = updated_bgp.clone(); - exists = true; - break; - } - } - if !exists { - change.added_bgp_configs.push(updated_bgp.clone()); - } - } - rack_net_config.bgp.extend_from_slice(&change.added_bgp_configs); - - Ok(change) -} + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); -pub(crate) async fn switch_sled_agent( - location: SwitchLocation, - sagactx: &NexusActionContext, -) -> Result { - let nexus = sagactx.user_data().nexus(); - let sled_agent_addr = get_scrimlet_address(location, nexus).await?; - Ok(sled_agent_client::Client::new( - &format!("http://{}", sled_agent_addr), - sagactx.user_data().log().clone(), - )) + ensure_switch_port_bgp_settings( + sagactx, + &opctx, + settings, + params.switch_port_name.clone(), + params.switch_port_id, + ) + .await } diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index 1ab2f6be0c..bcbd5bf894 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -5,7 +5,7 @@ use super::switch_port_settings_apply::select_dendrite_client; use super::{NexusActionContext, NEXUS_DPD_TAG}; use crate::app::sagas::retry_until_known_result; -use crate::app::sagas::switch_port_settings_apply::{ +use crate::app::sagas::switch_port_settings_common::{ api_to_dpd_port_settings, apply_bootstore_update, bootstore_update, ensure_switch_port_bgp_settings, ensure_switch_port_uplink, read_bootstore_config, select_mg_client, switch_sled_agent, @@ -214,7 +214,20 @@ async fn spa_undo_clear_switch_port_settings( async fn spa_clear_switch_port_uplink( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - ensure_switch_port_uplink(sagactx, true, None).await + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + ensure_switch_port_uplink( + sagactx, + &opctx, + true, + None, + params.switch_port_id, + params.port_name.clone(), + ) + .await } async fn spa_undo_clear_switch_port_uplink( @@ -223,8 +236,21 @@ async fn spa_undo_clear_switch_port_uplink( let id = sagactx .lookup::>("original_switch_port_settings_id") .map_err(|e| external::Error::internal_error(&e.to_string()))?; + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); - Ok(ensure_switch_port_uplink(sagactx, false, id).await?) + Ok(ensure_switch_port_uplink( + sagactx, + &opctx, + false, + id, + params.switch_port_id, + params.port_name.clone(), + ) + .await?) } async fn spa_clear_switch_port_bgp_settings( @@ -257,9 +283,13 @@ async fn spa_clear_switch_port_bgp_settings( .map_err(ActionError::action_failed)?; let mg_client: Arc = - select_mg_client(&sagactx).await.map_err(|e| { - ActionError::action_failed(format!("select mg client (undo): {e}")) - })?; + select_mg_client(&sagactx, &opctx, params.switch_port_id) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "select mg client (undo): {e}" + )) + })?; for peer in settings.bgp_peers { let config = nexus @@ -306,7 +336,14 @@ async fn spa_undo_clear_switch_port_bgp_settings( let settings = nexus.switch_port_settings_get(&opctx, &NameOrId::Id(id)).await?; - Ok(ensure_switch_port_bgp_settings(sagactx, settings).await?) + Ok(ensure_switch_port_bgp_settings( + sagactx, + &opctx, + settings, + params.port_name.clone(), + params.switch_port_id, + ) + .await?) } async fn spa_clear_switch_port_bootstore_network_settings( diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs new file mode 100644 index 0000000000..8e66aa12f8 --- /dev/null +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -0,0 +1,577 @@ +use super::NexusActionContext; +use crate::app::map_switch_zone_addrs; +use crate::Nexus; +use db::datastore::SwitchPortSettingsCombinedResult; +use dpd_client::types::{ + LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, + RouteSettingsV4, RouteSettingsV6, +}; +use dpd_client::{Ipv4Cidr, Ipv6Cidr}; +use internal_dns::ServiceName; +use ipnetwork::IpNetwork; +use mg_admin_client::types::Prefix4; +use mg_admin_client::types::{ApplyRequest, BgpPeerConfig}; +use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db; +use nexus_types::external_api::params; +use omicron_common::address::SLED_AGENT_PORT; +use omicron_common::api::external::NameOrId; +use omicron_common::api::internal::shared::{ + ParseSwitchLocationError, SwitchLocation, +}; +use sled_agent_client::types::PortConfigV1; +use sled_agent_client::types::RouteConfig; +use sled_agent_client::types::{BgpConfig, EarlyNetworkConfig}; +use sled_agent_client::types::{ + BgpPeerConfig as OmicronBgpPeerConfig, HostPortConfig, +}; +use std::collections::HashMap; +use std::net::SocketAddrV6; +use std::net::{IpAddr, Ipv6Addr}; +use std::sync::Arc; +use steno::ActionError; +use uuid::Uuid; + +// This is more of an implementation detail of the BGP implementation. It +// defines the maximum time the peering engine will wait for external messages +// before breaking to check for shutdown conditions. +const BGP_SESSION_RESOLUTION: u64 = 100; + +pub(crate) fn api_to_dpd_port_settings( + settings: &SwitchPortSettingsCombinedResult, +) -> Result { + let mut dpd_port_settings = PortSettings { + links: HashMap::new(), + v4_routes: HashMap::new(), + v6_routes: HashMap::new(), + }; + + //TODO breakouts + let link_id = LinkId(0); + + for l in settings.links.iter() { + dpd_port_settings.links.insert( + link_id.to_string(), + LinkSettings { + params: LinkCreate { + autoneg: false, + lane: Some(LinkId(0)), + kr: false, + fec: match l.fec { + SwitchLinkFec::Firecode => PortFec::Firecode, + SwitchLinkFec::Rs => PortFec::Rs, + SwitchLinkFec::None => PortFec::None, + }, + speed: match l.speed { + SwitchLinkSpeed::Speed0G => PortSpeed::Speed0G, + SwitchLinkSpeed::Speed1G => PortSpeed::Speed1G, + SwitchLinkSpeed::Speed10G => PortSpeed::Speed10G, + SwitchLinkSpeed::Speed25G => PortSpeed::Speed25G, + SwitchLinkSpeed::Speed40G => PortSpeed::Speed40G, + SwitchLinkSpeed::Speed50G => PortSpeed::Speed50G, + SwitchLinkSpeed::Speed100G => PortSpeed::Speed100G, + SwitchLinkSpeed::Speed200G => PortSpeed::Speed200G, + SwitchLinkSpeed::Speed400G => PortSpeed::Speed400G, + }, + }, + //TODO won't work for breakouts + addrs: settings + .addresses + .iter() + .map(|a| a.address.ip()) + .collect(), + }, + ); + } + + for r in &settings.routes { + match &r.dst { + IpNetwork::V4(n) => { + let gw = match r.gw.ip() { + IpAddr::V4(gw) => gw, + IpAddr::V6(_) => { + return Err( + "IPv4 destination cannot have IPv6 nexthop".into() + ) + } + }; + dpd_port_settings.v4_routes.insert( + Ipv4Cidr { prefix: n.ip(), prefix_len: n.prefix() } + .to_string(), + vec![RouteSettingsV4 { link_id: link_id.0, nexthop: gw }], + ); + } + IpNetwork::V6(n) => { + let gw = match r.gw.ip() { + IpAddr::V6(gw) => gw, + IpAddr::V4(_) => { + return Err( + "IPv6 destination cannot have IPv4 nexthop".into() + ) + } + }; + dpd_port_settings.v6_routes.insert( + Ipv6Cidr { prefix: n.ip(), prefix_len: n.prefix() } + .to_string(), + vec![RouteSettingsV6 { link_id: link_id.0, nexthop: gw }], + ); + } + } + } + + Ok(dpd_port_settings) +} + +pub(crate) fn apply_bootstore_update( + config: &mut EarlyNetworkConfig, + update: &EarlyNetworkPortUpdate, +) -> Result { + let mut change = BootstoreNetworkPortChange::default(); + + let rack_net_config = match &mut config.body.rack_network_config { + Some(cfg) => cfg, + None => { + return Err(ActionError::action_failed( + "rack network config not yet initialized".to_string(), + )) + } + }; + + for port in &mut rack_net_config.ports { + if port.port == update.port.port { + change.previous_port_config = Some(port.clone()); + *port = update.port.clone(); + break; + } + } + if change.previous_port_config.is_none() { + rack_net_config.ports.push(update.port.clone()); + } + + for updated_bgp in &update.bgp_configs { + let mut exists = false; + for resident_bgp in &mut rack_net_config.bgp { + if resident_bgp.asn == updated_bgp.asn { + change.changed_bgp_configs.push(resident_bgp.clone()); + *resident_bgp = updated_bgp.clone(); + exists = true; + break; + } + } + if !exists { + change.added_bgp_configs.push(updated_bgp.clone()); + } + } + rack_net_config.bgp.extend_from_slice(&change.added_bgp_configs); + + Ok(change) +} + +pub(crate) async fn bootstore_update( + nexus: &Arc, + opctx: &OpContext, + switch_port_id: Uuid, + switch_port_name: &str, + settings: &SwitchPortSettingsCombinedResult, +) -> Result { + let switch_port = + nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { + ActionError::action_failed(format!( + "get switch port for uplink: {e}" + )) + })?; + + let switch_location: SwitchLocation = + switch_port.switch_location.parse().map_err( + |e: ParseSwitchLocationError| { + ActionError::action_failed(format!( + "get switch location for uplink: {e:?}", + )) + }, + )?; + + let mut peer_info = Vec::new(); + let mut bgp_configs = Vec::new(); + for p in &settings.bgp_peers { + let bgp_config = nexus + .bgp_config_get(&opctx, p.bgp_config_id.into()) + .await + .map_err(|e| { + ActionError::action_failed(format!("get bgp config: {e}")) + })?; + + let announcements = nexus + .bgp_announce_list( + &opctx, + ¶ms::BgpAnnounceSetSelector { + name_or_id: NameOrId::Id(bgp_config.bgp_announce_set_id), + }, + ) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "get bgp announcements: {e}" + )) + })?; + + peer_info.push((p, bgp_config.asn.0)); + bgp_configs.push(BgpConfig { + asn: bgp_config.asn.0, + originate: announcements + .iter() + .filter_map(|a| match a.network { + IpNetwork::V4(net) => Some(net.into()), + //TODO v6 + _ => None, + }) + .collect(), + }); + } + + let update = EarlyNetworkPortUpdate { + port: PortConfigV1 { + routes: settings + .routes + .iter() + .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() }) + .collect(), + addresses: settings.addresses.iter().map(|a| a.address).collect(), + switch: switch_location, + port: switch_port_name.into(), + uplink_port_fec: settings + .links + .get(0) + .map(|l| l.fec) + .unwrap_or(SwitchLinkFec::None) + .into(), + uplink_port_speed: settings + .links + .get(0) + .map(|l| l.speed) + .unwrap_or(SwitchLinkSpeed::Speed100G) + .into(), + bgp_peers: peer_info + .iter() + .filter_map(|(p, asn)| { + //TODO v6 + match p.addr.ip() { + IpAddr::V4(addr) => Some(OmicronBgpPeerConfig { + asn: *asn, + port: switch_port_name.into(), + addr, + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), + }), + IpAddr::V6(_) => { + warn!(opctx.log, "IPv6 peers not yet supported"); + None + } + } + }) + .collect(), + }, + bgp_configs, + }; + + Ok(update) +} + +pub(crate) async fn ensure_switch_port_uplink( + sagactx: NexusActionContext, + opctx: &OpContext, + skip_self: bool, + inject: Option, + switch_port_id: Uuid, + switch_port_name: String, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); + + let switch_port = + nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { + ActionError::action_failed(format!( + "get switch port for uplink: {e}" + )) + })?; + + let switch_location: SwitchLocation = + switch_port.switch_location.parse().map_err(|e| { + ActionError::action_failed(format!( + "get switch location for uplink: {e:?}", + )) + })?; + + let mut uplinks: Vec = Vec::new(); + + // The sled agent uplinks interface is an all or nothing interface, so we + // need to get all the uplink configs for all the ports. + let active_ports = + nexus.active_port_settings(&opctx).await.map_err(|e| { + ActionError::action_failed(format!( + "get active switch port settings: {e}" + )) + })?; + + for (port, info) in &active_ports { + // Since we are undoing establishing uplinks for the settings + // associated with this port we skip adding this ports uplinks + // to the list - effectively removing them. + if skip_self && port.id == switch_port.id { + continue; + } + uplinks.push(HostPortConfig { + port: port.port_name.clone(), + addrs: info.addresses.iter().map(|a| a.address).collect(), + }) + } + + if let Some(id) = inject { + let settings = nexus + .switch_port_settings_get(&opctx, &id.into()) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "get switch port settings for injection: {e}" + )) + })?; + uplinks.push(HostPortConfig { + port: switch_port_name.clone(), + addrs: settings.addresses.iter().map(|a| a.address).collect(), + }) + } + + let sc = switch_sled_agent(switch_location, &sagactx).await?; + sc.uplink_ensure(&sled_agent_client::types::SwitchPorts { uplinks }) + .await + .map_err(|e| { + ActionError::action_failed(format!("ensure uplink: {e}")) + })?; + + Ok(()) +} + +pub(crate) async fn read_bootstore_config( + sa: &sled_agent_client::Client, +) -> Result { + Ok(sa + .read_network_bootstore_config_cache() + .await + .map_err(|e| { + ActionError::action_failed(format!( + "read bootstore network config: {e}" + )) + })? + .into_inner()) +} + +pub(crate) async fn write_bootstore_config( + sa: &sled_agent_client::Client, + config: &EarlyNetworkConfig, +) -> Result<(), ActionError> { + sa.write_network_bootstore_config(config).await.map_err(|e| { + ActionError::action_failed(format!( + "write bootstore network config: {e}" + )) + })?; + Ok(()) +} + +pub(crate) async fn select_mg_client( + sagactx: &NexusActionContext, + opctx: &OpContext, + switch_port_id: Uuid, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); + + let switch_port = + nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { + ActionError::action_failed(format!( + "get switch port for mg client selection: {e}" + )) + })?; + + let switch_location: SwitchLocation = + switch_port.switch_location.parse().map_err( + |e: ParseSwitchLocationError| { + ActionError::action_failed(format!( + "get switch location for uplink: {e:?}", + )) + }, + )?; + + let mg_client: Arc = osagactx + .nexus() + .mg_clients + .get(&switch_location) + .ok_or_else(|| { + ActionError::action_failed(format!( + "requested switch not available: {switch_location}" + )) + })? + .clone(); + Ok(mg_client) +} + +pub(crate) async fn switch_sled_agent( + location: SwitchLocation, + sagactx: &NexusActionContext, +) -> Result { + let nexus = sagactx.user_data().nexus(); + let sled_agent_addr = get_scrimlet_address(location, nexus).await?; + Ok(sled_agent_client::Client::new( + &format!("http://{}", sled_agent_addr), + sagactx.user_data().log().clone(), + )) +} + +pub(crate) async fn ensure_switch_port_bgp_settings( + sagactx: NexusActionContext, + opctx: &OpContext, + settings: SwitchPortSettingsCombinedResult, + switch_port_name: String, + switch_port_id: Uuid, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); + let mg_client: Arc = + select_mg_client(&sagactx, opctx, switch_port_id).await.map_err( + |e| ActionError::action_failed(format!("select mg client: {e}")), + )?; + + let mut bgp_peer_configs = Vec::new(); + + for peer in settings.bgp_peers { + let config = nexus + .bgp_config_get(&opctx, peer.bgp_config_id.into()) + .await + .map_err(|e| { + ActionError::action_failed(format!("get bgp config: {e}")) + })?; + + let announcements = nexus + .bgp_announce_list( + &opctx, + ¶ms::BgpAnnounceSetSelector { + name_or_id: NameOrId::Id(config.bgp_announce_set_id), + }, + ) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "get bgp announcements: {e}" + )) + })?; + + let mut prefixes = Vec::new(); + for a in &announcements { + let value = match a.network.ip() { + IpAddr::V4(value) => Ok(value), + IpAddr::V6(_) => Err(ActionError::action_failed( + "IPv6 announcement not yet supported".to_string(), + )), + }?; + prefixes.push(Prefix4 { value, length: a.network.prefix() }); + } + + let bpc = BgpPeerConfig { + asn: *config.asn, + name: format!("{}", peer.addr.ip()), //TODO user defined name? + host: format!("{}:179", peer.addr.ip()), + hold_time: peer.hold_time.0.into(), + idle_hold_time: peer.idle_hold_time.0.into(), + delay_open: peer.delay_open.0.into(), + connect_retry: peer.connect_retry.0.into(), + keepalive: peer.keepalive.0.into(), + resolution: BGP_SESSION_RESOLUTION, + originate: prefixes, + }; + + bgp_peer_configs.push(bpc); + } + + mg_client + .inner + .bgp_apply(&ApplyRequest { + peer_group: switch_port_name, + peers: bgp_peer_configs, + }) + .await + .map_err(|e| { + ActionError::action_failed(format!("apply bgp settings: {e}")) + })?; + + Ok(()) +} + +pub(crate) async fn get_scrimlet_address( + location: SwitchLocation, + nexus: &Arc, +) -> Result { + /* TODO this depends on DNS entries only coming from RSS, it's broken + on the upgrade path + nexus + .resolver() + .await + .lookup_socket_v6(ServiceName::Scrimlet(location)) + .await + .map_err(|e| e.to_string()) + .map_err(|e| { + ActionError::action_failed(format!( + "scrimlet dns lookup failed {e}", + )) + }) + */ + let result = nexus + .resolver() + .await + .lookup_all_ipv6(ServiceName::Dendrite) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "scrimlet dns lookup failed {e}", + )) + }); + + let mappings = match result { + Ok(addrs) => map_switch_zone_addrs(&nexus.log, addrs).await, + Err(e) => { + warn!(nexus.log, "Failed to lookup Dendrite address: {e}"); + return Err(ActionError::action_failed(format!( + "switch mapping failed {e}", + ))); + } + }; + + let addr = match mappings.get(&location) { + Some(addr) => addr, + None => { + return Err(ActionError::action_failed(format!( + "address for switch at location: {location} not found", + ))); + } + }; + + let mut segments = addr.segments(); + segments[7] = 1; + let addr = Ipv6Addr::from(segments); + + Ok(SocketAddrV6::new(addr, SLED_AGENT_PORT, 0, 0)) +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct BootstoreNetworkPortChange { + previous_port_config: Option, + changed_bgp_configs: Vec, + added_bgp_configs: Vec, +} + +#[derive(Clone, Debug)] +pub struct EarlyNetworkPortUpdate { + port: PortConfigV1, + bgp_configs: Vec, +} diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index ccd0b50fbe..d163fc6b06 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -318,4 +318,19 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { .execute() .await .unwrap(); + + // clear port settings + + NexusRequest::new( + RequestBuilder::new( + client, + Method::DELETE, + &format!("/v1/system/hardware/switch-port/qsfp0/settings?rack_id={rack_id}&switch_location=switch0"), + ) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); } From 15e307e3d2f8679b397423eb5a81ca1c9f3635bd Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 21 Nov 2023 12:22:34 -0800 Subject: [PATCH 008/186] [nexus] Add `RotUpdater` (#4502) This is analogous to #4427, and like it, we only add this type without attempting any integration into Nexus-at-large. Hopefully this PR looks bigger than it really is; the majority of changes are either: * Copy/paste additions from #4427 and change the details to make them relevant for RoT updates instead of SP updates (the tests are a particularly egregious case of this, but I think it makes sense to have duplication here as opposed to trying to make them too general?) * Refactoring to extract identical / near-identical bits after step 1 (most of this landed in the new `MgsClients` type) --- .../tests/output/collector_basic.txt | 20 +- .../tests/output/collector_errors.txt | 20 +- nexus/src/app/test_interfaces.rs | 3 + nexus/src/app/update/mgs_clients.rs | 240 +++++++ nexus/src/app/update/mod.rs | 15 +- nexus/src/app/update/rot_updater.rs | 272 ++++++++ nexus/src/app/update/sp_updater.rs | 266 ++------ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rot_updater.rs | 627 ++++++++++++++++++ nexus/tests/integration_tests/sp_updater.rs | 59 +- sp-sim/src/gimlet.rs | 34 +- sp-sim/src/lib.rs | 11 +- sp-sim/src/sidecar.rs | 34 +- sp-sim/src/update.rs | 44 +- wicket-common/src/update_events.rs | 14 +- wicketd/src/update_tracker.rs | 176 +++-- wicketd/tests/integration_tests/updates.rs | 10 +- 17 files changed, 1489 insertions(+), 357 deletions(-) create mode 100644 nexus/src/app/update/mgs_clients.rs create mode 100644 nexus/src/app/update/rot_updater.rs create mode 100644 nexus/tests/integration_tests/rot_updater.rs diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 4a3bf62d63..76b929bfba 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -5,9 +5,9 @@ baseboards: part "FAKE_SIM_SIDECAR" serial "SimSidecar1" cabooses: - board "SimGimletRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" - board "SimSidecarRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" + board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" SPs: @@ -31,13 +31,13 @@ cabooses found: SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletRot" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletRot" - RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarRot" - RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletRot" - RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarRot" - RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" errors: diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index 4404046253..c61d2e7c29 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -5,9 +5,9 @@ baseboards: part "FAKE_SIM_SIDECAR" serial "SimSidecar1" cabooses: - board "SimGimletRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" - board "SimSidecarRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" + board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" SPs: @@ -31,14 +31,14 @@ cabooses found: SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletRot" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletRot" - RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarRot" - RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletRot" - RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarRot" - RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" errors: error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error <> diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index ad2ea50e07..6161a9a1c1 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -10,6 +10,9 @@ use sled_agent_client::Client as SledAgentClient; use std::sync::Arc; use uuid::Uuid; +pub use super::update::MgsClients; +pub use super::update::RotUpdateError; +pub use super::update::RotUpdater; pub use super::update::SpUpdateError; pub use super::update::SpUpdater; pub use super::update::UpdateProgress; diff --git a/nexus/src/app/update/mgs_clients.rs b/nexus/src/app/update/mgs_clients.rs new file mode 100644 index 0000000000..5915505829 --- /dev/null +++ b/nexus/src/app/update/mgs_clients.rs @@ -0,0 +1,240 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module providing support for handling failover between multiple MGS clients + +use futures::Future; +use gateway_client::types::SpType; +use gateway_client::types::SpUpdateStatus; +use gateway_client::Client; +use slog::Logger; +use std::collections::VecDeque; +use std::sync::Arc; +use uuid::Uuid; + +pub(super) type GatewayClientError = + gateway_client::Error; + +pub(super) enum PollUpdateStatus { + Preparing { progress: Option }, + InProgress { progress: Option }, + Complete, +} + +#[derive(Debug, thiserror::Error)] +pub enum UpdateStatusError { + #[error("different update is now preparing ({0})")] + DifferentUpdatePreparing(Uuid), + #[error("different update is now in progress ({0})")] + DifferentUpdateInProgress(Uuid), + #[error("different update is now complete ({0})")] + DifferentUpdateComplete(Uuid), + #[error("different update is now aborted ({0})")] + DifferentUpdateAborted(Uuid), + #[error("different update failed ({0})")] + DifferentUpdateFailed(Uuid), + #[error("update status lost (did the SP reset?)")] + UpdateStatusLost, + #[error("update was aborted")] + UpdateAborted, + #[error("update failed (error code {0})")] + UpdateFailedWithCode(u32), + #[error("update failed (error message {0})")] + UpdateFailedWithMessage(String), +} + +#[derive(Debug, thiserror::Error)] +pub(super) enum PollUpdateStatusError { + #[error(transparent)] + StatusError(#[from] UpdateStatusError), + #[error(transparent)] + ClientError(#[from] GatewayClientError), +} + +#[derive(Debug, Clone)] +pub struct MgsClients { + clients: VecDeque>, +} + +impl MgsClients { + /// Create a new `MgsClients` with the given `clients`. + /// + /// # Panics + /// + /// Panics if `clients` is empty. + pub fn new>>>(clients: T) -> Self { + let clients = clients.into(); + assert!(!clients.is_empty()); + Self { clients } + } + + /// Create a new `MgsClients` with the given `clients`. + /// + /// # Panics + /// + /// Panics if `clients` is empty. + pub fn from_clients>(clients: I) -> Self { + let clients = clients + .into_iter() + .map(Arc::new) + .collect::>>(); + Self::new(clients) + } + + /// Run `op` against all clients in sequence until either one succeeds (in + /// which case the success value is returned), one fails with a + /// non-communication error (in which case that error is returned), or all + /// of them fail with communication errors (in which case the communication + /// error from the last-attempted client is returned). + /// + /// On a successful return, the internal client list will be reordered so + /// any future accesses will attempt the most-recently-successful client. + pub(super) async fn try_all_serially( + &mut self, + log: &Logger, + op: F, + ) -> Result + where + // Seems like it would be nicer to take `&Client` here instead of + // needing to clone each `Arc`, but there's currently no decent way of + // doing that without boxing the returned future: + // https://users.rust-lang.org/t/how-to-express-that-the-future-returned-by-a-closure-lives-only-as-long-as-its-argument/90039/10 + F: Fn(Arc) -> Fut, + Fut: Future>, + { + let mut last_err = None; + for (i, client) in self.clients.iter().enumerate() { + match op(Arc::clone(client)).await { + Ok(value) => { + self.clients.rotate_left(i); + return Ok(value); + } + Err(GatewayClientError::CommunicationError(err)) => { + if i < self.clients.len() { + warn!( + log, "communication error with MGS; \ + will try next client"; + "mgs_addr" => client.baseurl(), + "err" => %err, + ); + } + last_err = Some(err); + continue; + } + Err(err) => return Err(err), + } + } + + // The only way to get here is if all clients failed with communication + // errors. Return the error from the last MGS we tried. + Err(GatewayClientError::CommunicationError(last_err.unwrap())) + } + + /// Poll for the status of an expected-to-be-in-progress update. + pub(super) async fn poll_update_status( + &mut self, + sp_type: SpType, + sp_slot: u32, + component: &'static str, + update_id: Uuid, + log: &Logger, + ) -> Result { + let update_status = self + .try_all_serially(log, |client| async move { + let update_status = client + .sp_component_update_status(sp_type, sp_slot, component) + .await?; + + debug!( + log, "got update status"; + "mgs_addr" => client.baseurl(), + "status" => ?update_status, + ); + + Ok(update_status) + }) + .await? + .into_inner(); + + match update_status { + SpUpdateStatus::Preparing { id, progress } => { + if id == update_id { + let progress = progress.and_then(|progress| { + if progress.current > progress.total { + warn!( + log, "nonsense preparing progress"; + "current" => progress.current, + "total" => progress.total, + ); + None + } else if progress.total == 0 { + None + } else { + Some( + f64::from(progress.current) + / f64::from(progress.total), + ) + } + }); + Ok(PollUpdateStatus::Preparing { progress }) + } else { + Err(UpdateStatusError::DifferentUpdatePreparing(id).into()) + } + } + SpUpdateStatus::InProgress { id, bytes_received, total_bytes } => { + if id == update_id { + let progress = if bytes_received > total_bytes { + warn!( + log, "nonsense update progress"; + "bytes_received" => bytes_received, + "total_bytes" => total_bytes, + ); + None + } else if total_bytes == 0 { + None + } else { + Some(f64::from(bytes_received) / f64::from(total_bytes)) + }; + Ok(PollUpdateStatus::InProgress { progress }) + } else { + Err(UpdateStatusError::DifferentUpdateInProgress(id).into()) + } + } + SpUpdateStatus::Complete { id } => { + if id == update_id { + Ok(PollUpdateStatus::Complete) + } else { + Err(UpdateStatusError::DifferentUpdateComplete(id).into()) + } + } + SpUpdateStatus::None => { + Err(UpdateStatusError::UpdateStatusLost.into()) + } + SpUpdateStatus::Aborted { id } => { + if id == update_id { + Err(UpdateStatusError::UpdateAborted.into()) + } else { + Err(UpdateStatusError::DifferentUpdateAborted(id).into()) + } + } + SpUpdateStatus::Failed { code, id } => { + if id == update_id { + Err(UpdateStatusError::UpdateFailedWithCode(code).into()) + } else { + Err(UpdateStatusError::DifferentUpdateFailed(id).into()) + } + } + SpUpdateStatus::RotError { id, message } => { + if id == update_id { + Err(UpdateStatusError::UpdateFailedWithMessage(format!( + "rot error: {message}" + )) + .into()) + } else { + Err(UpdateStatusError::DifferentUpdateFailed(id).into()) + } + } + } + } +} diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 165a6ae23b..7d5c642822 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -26,9 +26,22 @@ use std::path::Path; use tokio::io::AsyncWriteExt; use uuid::Uuid; +mod mgs_clients; +mod rot_updater; mod sp_updater; -pub use sp_updater::{SpUpdateError, SpUpdater, UpdateProgress}; +pub use mgs_clients::{MgsClients, UpdateStatusError}; +pub use rot_updater::{RotUpdateError, RotUpdater}; +pub use sp_updater::{SpUpdateError, SpUpdater}; + +#[derive(Debug, PartialEq, Clone)] +pub enum UpdateProgress { + Started, + Preparing { progress: Option }, + InProgress { progress: Option }, + Complete, + Failed(String), +} static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; diff --git a/nexus/src/app/update/rot_updater.rs b/nexus/src/app/update/rot_updater.rs new file mode 100644 index 0000000000..d7d21e3b3a --- /dev/null +++ b/nexus/src/app/update/rot_updater.rs @@ -0,0 +1,272 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module containing types for updating RoTs via MGS. + +use super::mgs_clients::PollUpdateStatusError; +use super::MgsClients; +use super::UpdateProgress; +use super::UpdateStatusError; +use crate::app::update::mgs_clients::PollUpdateStatus; +use gateway_client::types::RotSlot; +use gateway_client::types::SpComponentFirmwareSlot; +use gateway_client::types::SpType; +use gateway_client::SpComponent; +use slog::Logger; +use std::time::Duration; +use tokio::sync::watch; +use uuid::Uuid; + +type GatewayClientError = gateway_client::Error; + +#[derive(Debug, thiserror::Error)] +pub enum RotUpdateError { + #[error("error communicating with MGS")] + MgsCommunication(#[from] GatewayClientError), + + #[error("failed checking update status: {0}")] + PollUpdateStatus(#[from] UpdateStatusError), +} + +impl From for RotUpdateError { + fn from(err: PollUpdateStatusError) -> Self { + match err { + PollUpdateStatusError::StatusError(err) => err.into(), + PollUpdateStatusError::ClientError(err) => err.into(), + } + } +} + +pub struct RotUpdater { + log: Logger, + progress: watch::Sender>, + sp_type: SpType, + sp_slot: u32, + target_rot_slot: RotSlot, + update_id: Uuid, + // TODO-clarity maybe a newtype for this? TBD how we get this from + // wherever it's stored, which might give us a stronger type already. + rot_hubris_archive: Vec, +} + +impl RotUpdater { + pub fn new( + sp_type: SpType, + sp_slot: u32, + target_rot_slot: RotSlot, + update_id: Uuid, + rot_hubris_archive: Vec, + log: &Logger, + ) -> Self { + let log = log.new(slog::o!( + "component" => "RotUpdater", + "sp_type" => format!("{sp_type:?}"), + "sp_slot" => sp_slot, + "target_rot_slot" => format!("{target_rot_slot:?}"), + "update_id" => format!("{update_id}"), + )); + let progress = watch::Sender::new(None); + Self { + log, + progress, + sp_type, + sp_slot, + target_rot_slot, + update_id, + rot_hubris_archive, + } + } + + pub fn progress_watcher(&self) -> watch::Receiver> { + self.progress.subscribe() + } + + /// Drive this RoT update to completion (or failure). + /// + /// Only one MGS instance is required to drive an update; however, if + /// multiple MGS instances are available and passed to this method and an + /// error occurs communicating with one instance, `RotUpdater` will try the + /// remaining instances before failing. + pub async fn update( + self, + mut mgs_clients: MgsClients, + ) -> Result<(), RotUpdateError> { + // The async blocks below want `&self` references, but we take `self` + // for API clarity (to start a new update, the caller should construct a + // new updater). Create a `&self` ref that we use through the remainder + // of this method. + let me = &self; + + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.start_update_one_mgs(&client).await + }) + .await?; + + // `wait_for_update_completion` uses `try_all_mgs_clients` internally, + // so we don't wrap it here. + me.wait_for_update_completion(&mut mgs_clients).await?; + + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.mark_target_slot_active_one_mgs(&client).await + }) + .await?; + + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.finalize_update_via_reset_one_mgs(&client).await + }) + .await?; + + // wait for any progress watchers to be dropped before we return; + // otherwise, they'll get `RecvError`s when trying to check the current + // status + self.progress.closed().await; + + Ok(()) + } + + async fn start_update_one_mgs( + &self, + client: &gateway_client::Client, + ) -> Result<(), GatewayClientError> { + let firmware_slot = self.target_rot_slot.as_u16(); + + // Start the update. + client + .sp_component_update( + self.sp_type, + self.sp_slot, + SpComponent::ROT.const_as_str(), + firmware_slot, + &self.update_id, + reqwest::Body::from(self.rot_hubris_archive.clone()), + ) + .await?; + + self.progress.send_replace(Some(UpdateProgress::Started)); + + info!( + self.log, "RoT update started"; + "mgs_addr" => client.baseurl(), + ); + + Ok(()) + } + + async fn wait_for_update_completion( + &self, + mgs_clients: &mut MgsClients, + ) -> Result<(), RotUpdateError> { + // How frequently do we poll MGS for the update progress? + const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); + + loop { + let status = mgs_clients + .poll_update_status( + self.sp_type, + self.sp_slot, + SpComponent::ROT.const_as_str(), + self.update_id, + &self.log, + ) + .await?; + + // For `Preparing` and `InProgress`, we could check the progress + // information returned by these steps and try to check that + // we're still _making_ progress, but every Nexus instance needs + // to do that anyway in case we (or the MGS instance delivering + // the update) crash, so we'll omit that check here. Instead, we + // just sleep and we'll poll again shortly. + match status { + PollUpdateStatus::Preparing { progress } => { + self.progress.send_replace(Some( + UpdateProgress::Preparing { progress }, + )); + } + PollUpdateStatus::InProgress { progress } => { + self.progress.send_replace(Some( + UpdateProgress::InProgress { progress }, + )); + } + PollUpdateStatus::Complete => { + self.progress.send_replace(Some( + UpdateProgress::InProgress { progress: Some(1.0) }, + )); + return Ok(()); + } + } + + tokio::time::sleep(STATUS_POLL_INTERVAL).await; + } + } + + async fn mark_target_slot_active_one_mgs( + &self, + client: &gateway_client::Client, + ) -> Result<(), GatewayClientError> { + // RoT currently doesn't support non-persistent slot swapping, so always + // tell it to persist our choice. + let persist = true; + + let slot = self.target_rot_slot.as_u16(); + + client + .sp_component_active_slot_set( + self.sp_type, + self.sp_slot, + SpComponent::ROT.const_as_str(), + persist, + &SpComponentFirmwareSlot { slot }, + ) + .await?; + + // TODO-correctness Should we send some kind of update to + // `self.progress`? We already sent `InProgress(1.0)` when the update + // finished delivering. Or perhaps we shouldn't even be doing this step + // and the reset, and let our caller handle the finalization? + + info!( + self.log, "RoT target slot marked active"; + "mgs_addr" => client.baseurl(), + ); + + Ok(()) + } + + async fn finalize_update_via_reset_one_mgs( + &self, + client: &gateway_client::Client, + ) -> Result<(), GatewayClientError> { + client + .sp_component_reset( + self.sp_type, + self.sp_slot, + SpComponent::ROT.const_as_str(), + ) + .await?; + + self.progress.send_replace(Some(UpdateProgress::Complete)); + info!( + self.log, "RoT update complete"; + "mgs_addr" => client.baseurl(), + ); + + Ok(()) + } +} + +trait RotSlotAsU16 { + fn as_u16(&self) -> u16; +} + +impl RotSlotAsU16 for RotSlot { + fn as_u16(&self) -> u16 { + match self { + RotSlot::A => 0, + RotSlot::B => 1, + } + } +} diff --git a/nexus/src/app/update/sp_updater.rs b/nexus/src/app/update/sp_updater.rs index 9abb2ad222..419a733441 100644 --- a/nexus/src/app/update/sp_updater.rs +++ b/nexus/src/app/update/sp_updater.rs @@ -4,13 +4,15 @@ //! Module containing types for updating SPs via MGS. -use futures::Future; +use crate::app::update::mgs_clients::PollUpdateStatus; + +use super::mgs_clients::PollUpdateStatusError; +use super::MgsClients; +use super::UpdateProgress; +use super::UpdateStatusError; use gateway_client::types::SpType; -use gateway_client::types::SpUpdateStatus; use gateway_client::SpComponent; use slog::Logger; -use std::collections::VecDeque; -use std::sync::Arc; use std::time::Duration; use tokio::sync::watch; use uuid::Uuid; @@ -22,20 +24,17 @@ pub enum SpUpdateError { #[error("error communicating with MGS")] MgsCommunication(#[from] GatewayClientError), - // Error returned when we successfully start an update but it fails to - // complete successfully. - #[error("update failed to complete: {0}")] - FailedToComplete(String), + #[error("failed checking update status: {0}")] + PollUpdateStatus(#[from] UpdateStatusError), } -// TODO-cleanup Probably share this with other update implementations? -#[derive(Debug, PartialEq, Clone)] -pub enum UpdateProgress { - Started, - Preparing { progress: Option }, - InProgress { progress: Option }, - Complete, - Failed(String), +impl From for SpUpdateError { + fn from(err: PollUpdateStatusError) -> Self { + match err { + PollUpdateStatusError::StatusError(err) => err.into(), + PollUpdateStatusError::ClientError(err) => err.into(), + } + } } pub struct SpUpdater { @@ -58,6 +57,7 @@ impl SpUpdater { log: &Logger, ) -> Self { let log = log.new(slog::o!( + "component" => "SpUpdater", "sp_type" => format!("{sp_type:?}"), "sp_slot" => sp_slot, "update_id" => format!("{update_id}"), @@ -76,78 +76,38 @@ impl SpUpdater { /// multiple MGS instances are available and passed to this method and an /// error occurs communicating with one instance, `SpUpdater` will try the /// remaining instances before failing. - /// - /// # Panics - /// - /// If `mgs_clients` is empty. - pub async fn update>>>( + pub async fn update( self, - mgs_clients: T, + mut mgs_clients: MgsClients, ) -> Result<(), SpUpdateError> { - let mut mgs_clients = mgs_clients.into(); - assert!(!mgs_clients.is_empty()); - // The async blocks below want `&self` references, but we take `self` // for API clarity (to start a new SP update, the caller should // construct a new `SpUpdater`). Create a `&self` ref that we use // through the remainder of this method. let me = &self; - me.try_all_mgs_clients(&mut mgs_clients, |client| async move { - me.start_update_one_mgs(&client).await - }) - .await?; + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.start_update_one_mgs(&client).await + }) + .await?; // `wait_for_update_completion` uses `try_all_mgs_clients` internally, // so we don't wrap it here. me.wait_for_update_completion(&mut mgs_clients).await?; - me.try_all_mgs_clients(&mut mgs_clients, |client| async move { - me.finalize_update_via_reset_one_mgs(&client).await - }) - .await?; + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.finalize_update_via_reset_one_mgs(&client).await + }) + .await?; - Ok(()) - } + // wait for any progress watchers to be dropped before we return; + // otherwise, they'll get `RecvError`s when trying to check the current + // status + self.progress.closed().await; - // Helper method to run `op` against all clients. If `op` returns - // successfully for one client, that client will be rotated to the front of - // the list (so any subsequent operations can start with the first client). - async fn try_all_mgs_clients( - &self, - mgs_clients: &mut VecDeque>, - op: F, - ) -> Result - where - F: Fn(Arc) -> Fut, - Fut: Future>, - { - let mut last_err = None; - for (i, client) in mgs_clients.iter().enumerate() { - match op(Arc::clone(client)).await { - Ok(val) => { - // Shift our list of MGS clients such that the one we just - // used is at the front for subsequent requests. - mgs_clients.rotate_left(i); - return Ok(val); - } - // If we have an error communicating with an MGS instance - // (timeout, unexpected connection close, etc.), we'll move on - // and try the next MGS client. If this was the last client, - // we'll stash the error in `last_err` and return it below the - // loop. - Err(GatewayClientError::CommunicationError(err)) => { - last_err = Some(err); - continue; - } - Err(err) => return Err(err), - } - } - - // We know we have at least one `mgs_client`, so the only way to get - // here is if all clients failed with connection errors. Return the - // error from the last MGS we tried. - Err(GatewayClientError::CommunicationError(last_err.unwrap())) + Ok(()) } async fn start_update_one_mgs( @@ -183,142 +143,48 @@ impl SpUpdater { async fn wait_for_update_completion( &self, - mgs_clients: &mut VecDeque>, + mgs_clients: &mut MgsClients, ) -> Result<(), SpUpdateError> { // How frequently do we poll MGS for the update progress? const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); loop { - let update_status = self - .try_all_mgs_clients(mgs_clients, |client| async move { - let update_status = client - .sp_component_update_status( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - ) - .await?; - - info!( - self.log, "got SP update status"; - "mgs_addr" => client.baseurl(), - "status" => ?update_status, - ); - - Ok(update_status) - }) - .await? - .into_inner(); - - // The majority of possible update statuses indicate failure; we'll - // handle the small number of non-failure cases by either - // `continue`ing or `return`ing; all other branches will give us an - // error string that we can report. - let error_message = match update_status { - // For `Preparing` and `InProgress`, we could check the progress - // information returned by these steps and try to check that - // we're still _making_ progress, but every Nexus instance needs - // to do that anyway in case we (or the MGS instance delivering - // the update) crash, so we'll omit that check here. Instead, we - // just sleep and we'll poll again shortly. - SpUpdateStatus::Preparing { id, progress } => { - if id == self.update_id { - let progress = progress.and_then(|progress| { - if progress.current > progress.total { - warn!( - self.log, "nonsense SP preparing progress"; - "current" => progress.current, - "total" => progress.total, - ); - None - } else if progress.total == 0 { - None - } else { - Some( - f64::from(progress.current) - / f64::from(progress.total), - ) - } - }); - self.progress.send_replace(Some( - UpdateProgress::Preparing { progress }, - )); - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - continue; - } else { - format!("different update is now preparing ({id})") - } - } - SpUpdateStatus::InProgress { - id, - bytes_received, - total_bytes, - } => { - if id == self.update_id { - let progress = if bytes_received > total_bytes { - warn!( - self.log, "nonsense SP progress"; - "bytes_received" => bytes_received, - "total_bytes" => total_bytes, - ); - None - } else if total_bytes == 0 { - None - } else { - Some( - f64::from(bytes_received) - / f64::from(total_bytes), - ) - }; - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress }, - )); - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - continue; - } else { - format!("different update is now in progress ({id})") - } - } - SpUpdateStatus::Complete { id } => { - if id == self.update_id { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress: Some(1.0) }, - )); - return Ok(()); - } else { - format!("different update is now in complete ({id})") - } + let status = mgs_clients + .poll_update_status( + self.sp_type, + self.sp_slot, + SpComponent::SP_ITSELF.const_as_str(), + self.update_id, + &self.log, + ) + .await?; + + // For `Preparing` and `InProgress`, we could check the progress + // information returned by these steps and try to check that + // we're still _making_ progress, but every Nexus instance needs + // to do that anyway in case we (or the MGS instance delivering + // the update) crash, so we'll omit that check here. Instead, we + // just sleep and we'll poll again shortly. + match status { + PollUpdateStatus::Preparing { progress } => { + self.progress.send_replace(Some( + UpdateProgress::Preparing { progress }, + )); } - SpUpdateStatus::None => { - "update status lost (did the SP reset?)".to_string() + PollUpdateStatus::InProgress { progress } => { + self.progress.send_replace(Some( + UpdateProgress::InProgress { progress }, + )); } - SpUpdateStatus::Aborted { id } => { - if id == self.update_id { - "update was aborted".to_string() - } else { - format!("different update is now in complete ({id})") - } + PollUpdateStatus::Complete => { + self.progress.send_replace(Some( + UpdateProgress::InProgress { progress: Some(1.0) }, + )); + return Ok(()); } - SpUpdateStatus::Failed { code, id } => { - if id == self.update_id { - format!("update failed (error code {code})") - } else { - format!("different update failed ({id})") - } - } - SpUpdateStatus::RotError { id, message } => { - if id == self.update_id { - format!("update failed (rot error: {message})") - } else { - format!("different update failed with rot error ({id})") - } - } - }; + } - self.progress.send_replace(Some(UpdateProgress::Failed( - error_message.clone(), - ))); - return Err(SpUpdateError::FailedToComplete(error_message)); + tokio::time::sleep(STATUS_POLL_INTERVAL).await; } } diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index e0bb09de4f..87c5c74f0f 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -25,6 +25,7 @@ mod projects; mod rack; mod role_assignments; mod roles_builtin; +mod rot_updater; mod router_routes; mod saml; mod schema; diff --git a/nexus/tests/integration_tests/rot_updater.rs b/nexus/tests/integration_tests/rot_updater.rs new file mode 100644 index 0000000000..750f9571d0 --- /dev/null +++ b/nexus/tests/integration_tests/rot_updater.rs @@ -0,0 +1,627 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests `RotUpdater`'s delivery of updates to RoTs via MGS + +use gateway_client::types::{RotSlot, SpType}; +use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; +use gateway_test_utils::setup as mgs_setup; +use hubtools::RawHubrisArchive; +use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; +use omicron_nexus::app::test_interfaces::{ + MgsClients, RotUpdater, UpdateProgress, +}; +use sp_sim::SimulatedSp; +use sp_sim::SIM_ROT_BOARD; +use std::mem; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::io::AsyncWriteExt; +use tokio::net::TcpListener; +use tokio::net::TcpStream; +use tokio::sync::mpsc; +use uuid::Uuid; + +fn make_fake_rot_image() -> Vec { + let caboose = CabooseBuilder::default() + .git_commit("fake-git-commit") + .board(SIM_ROT_BOARD) + .version("0.0.0") + .name("fake-name") + .build(); + + let mut builder = HubrisArchiveBuilder::with_fake_image(); + builder.write_caboose(caboose.as_slice()).unwrap(); + builder.build_to_vec().unwrap() +} + +#[tokio::test] +async fn test_rot_updater_updates_sled() { + // Start MGS + Sim SP. + let mgstestctx = + mgs_setup::test_setup("test_rot_updater_updates_sled", SpPort::One) + .await; + + // Configure an MGS client. + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + // Configure and instantiate an `RotUpdater`. + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let hubris_archive = make_fake_rot_image(); + let target_rot_slot = RotSlot::B; + + let rot_updater = RotUpdater::new( + sp_type, + sp_slot, + target_rot_slot, + update_id, + hubris_archive.clone(), + &mgstestctx.logctx.log, + ); + + // Run the update. + rot_updater.update(mgs_clients).await.expect("update failed"); + + // Ensure the RoT received the complete update. + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_rot_update_data() + .await + .expect("simulated RoT did not receive an update"); + + let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); + + assert_eq!( + hubris_archive.image.data.as_slice(), + &*last_update_image, + "simulated RoT update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + hubris_archive.image.data.len() + ); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_rot_updater_updates_switch() { + // Start MGS + Sim SP. + let mgstestctx = + mgs_setup::test_setup("test_rot_updater_updates_switch", SpPort::One) + .await; + + // Configure an MGS client. + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + let sp_type = SpType::Switch; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let hubris_archive = make_fake_rot_image(); + let target_rot_slot = RotSlot::B; + + let rot_updater = RotUpdater::new( + sp_type, + sp_slot, + target_rot_slot, + update_id, + hubris_archive.clone(), + &mgstestctx.logctx.log, + ); + + rot_updater.update(mgs_clients).await.expect("update failed"); + + let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] + .last_rot_update_data() + .await + .expect("simulated RoT did not receive an update"); + + let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); + + assert_eq!( + hubris_archive.image.data.as_slice(), + &*last_update_image, + "simulated RoT update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + hubris_archive.image.data.len() + ); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_rot_updater_remembers_successful_mgs_instance() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_rot_updater_remembers_successful_mgs_instance", + SpPort::One, + ) + .await; + + // Also start a local TCP server that we will claim is an MGS instance, but + // it will close connections immediately after accepting them. This will + // allow us to count how many connections it receives, while simultaneously + // causing errors in the RotUpdater when it attempts to use this "MGS". + let (failing_mgs_task, failing_mgs_addr, failing_mgs_conn_counter) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let conn_count = Arc::new(AtomicUsize::new(0)); + + let task = { + let conn_count = Arc::clone(&conn_count); + tokio::spawn(async move { + loop { + let (mut stream, _peer) = socket.accept().await.unwrap(); + conn_count.fetch_add(1, Ordering::SeqCst); + stream.shutdown().await.unwrap(); + } + }) + }; + + (task, addr, conn_count) + }; + + // Order the MGS clients such that the bogus MGS that immediately closes + // connections comes first. `RotUpdater` should remember that the second MGS + // instance succeeds, and only send subsequent requests to it: we should + // only see a single attempted connection to the bogus MGS, even though + // delivering an update requires a bare minimum of three requests (start the + // update, query the status, reset the RoT) and often more (if repeated + // queries are required to wait for completion). + let mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new( + &format!("http://{failing_mgs_addr}"), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let hubris_archive = make_fake_rot_image(); + let target_rot_slot = RotSlot::B; + + let rot_updater = RotUpdater::new( + sp_type, + sp_slot, + target_rot_slot, + update_id, + hubris_archive.clone(), + &mgstestctx.logctx.log, + ); + + rot_updater.update(mgs_clients).await.expect("update failed"); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_rot_update_data() + .await + .expect("simulated RoT did not receive an update"); + + let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); + + assert_eq!( + hubris_archive.image.data.as_slice(), + &*last_update_image, + "simulated RoT update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + hubris_archive.image.data.len() + ); + + // Check that our bogus MGS only received a single connection attempt. + // (After RotUpdater failed to talk to this instance, it should have fallen + // back to the valid one for all further requests.) + assert_eq!( + failing_mgs_conn_counter.load(Ordering::SeqCst), + 1, + "bogus MGS instance didn't receive the expected number of connections" + ); + failing_mgs_task.abort(); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_rot_updater_switches_mgs_instances_on_failure() { + enum MgsProxy { + One(TcpStream), + Two(TcpStream), + } + + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_rot_updater_switches_mgs_instances_on_failure", + SpPort::One, + ) + .await; + let mgs_bind_addr = mgstestctx.client.bind_address; + + let spawn_mgs_proxy_task = |mut stream: TcpStream| { + tokio::spawn(async move { + let mut mgs_stream = TcpStream::connect(mgs_bind_addr) + .await + .expect("failed to connect to MGS"); + tokio::io::copy_bidirectional(&mut stream, &mut mgs_stream) + .await + .expect("failed to proxy connection to MGS"); + }) + }; + + // Start two MGS proxy tasks; when each receives an incoming TCP connection, + // it forwards that `TcpStream` along the `mgs_proxy_connections` channel + // along with a tag of which proxy it is. We'll use this below to flip flop + // between MGS "instances" (really these two proxies). + let (mgs_proxy_connections_tx, mut mgs_proxy_connections_rx) = + mpsc::unbounded_channel(); + let (mgs_proxy_one_task, mgs_proxy_one_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let mgs_proxy_connections_tx = mgs_proxy_connections_tx.clone(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::One(stream)).unwrap(); + } + }); + (task, addr) + }; + let (mgs_proxy_two_task, mgs_proxy_two_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::Two(stream)).unwrap(); + } + }); + (task, addr) + }; + + // Disable connection pooling so each request gets a new TCP connection. + let client = + reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); + + // Configure two MGS clients pointed at our two proxy tasks. + let mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_one_addr}"), + client.clone(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_two_addr}"), + client, + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient2")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let hubris_archive = make_fake_rot_image(); + let target_rot_slot = RotSlot::B; + + let rot_updater = RotUpdater::new( + sp_type, + sp_slot, + target_rot_slot, + update_id, + hubris_archive.clone(), + &mgstestctx.logctx.log, + ); + + // Spawn the actual update task. + let mut update_task = tokio::spawn(rot_updater.update(mgs_clients)); + + // Loop over incoming requests. We expect this sequence: + // + // 1. Connection arrives on the first proxy + // 2. We spawn a task to service that request, and set `should_swap` + // 3. Connection arrives on the first proxy + // 4. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // 5. Connection arrives on the second proxy + // 6. We spawn a task to service that request, and set `should_swap` + // 7. Connection arrives on the second proxy + // 8. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // + // ... repeat until the update is complete. + let mut expected_proxy = 0; + let mut proxy_one_count = 0; + let mut proxy_two_count = 0; + let mut total_requests_handled = 0; + let mut should_swap = false; + loop { + tokio::select! { + Some(proxy_stream) = mgs_proxy_connections_rx.recv() => { + let stream = match proxy_stream { + MgsProxy::One(stream) => { + assert_eq!(expected_proxy, 0); + proxy_one_count += 1; + stream + } + MgsProxy::Two(stream) => { + assert_eq!(expected_proxy, 1); + proxy_two_count += 1; + stream + } + }; + + // Should we trigger `RotUpdater` to swap to the other MGS + // (proxy)? If so, do that by dropping this connection (which + // will cause a client failure) and note that we expect the next + // incoming request to come on the other proxy. + if should_swap { + mem::drop(stream); + expected_proxy ^= 1; + should_swap = false; + } else { + // Otherwise, handle this connection. + total_requests_handled += 1; + spawn_mgs_proxy_task(stream); + should_swap = true; + } + } + + result = &mut update_task => { + match result { + Ok(Ok(())) => { + mgs_proxy_one_task.abort(); + mgs_proxy_two_task.abort(); + break; + } + Ok(Err(err)) => panic!("update failed: {err}"), + Err(err) => panic!("update task panicked: {err}"), + } + } + } + } + + // An RoT update requires a minimum of 4 requests to MGS: post the update, + // check the status, post to mark the new target slot active, and post an + // RoT reset. There may be more requests if the update is not yet complete + // when the status is checked, but we can just check that each of our + // proxies received at least 2 incoming requests; based on our outline + // above, if we got the minimum of 4 requests, it would look like this: + // + // 1. POST update -> first proxy (success) + // 2. GET status -> first proxy (fail) + // 3. GET status retry -> second proxy (success) + // 4. POST new target slot -> second proxy (fail) + // 5. POST new target slot -> first proxy (success) + // 6. POST reset -> first proxy (fail) + // 7. POST reset -> second proxy (success) + // + // This pattern would repeat if multiple status requests were required, so + // we always expect the first proxy to see exactly one more connection + // attempt than the second (because it went first before they started + // swapping), and the two together should see a total of one less than + // double the number of successful requests required. + assert!(total_requests_handled >= 3); + assert_eq!(proxy_one_count, proxy_two_count + 1); + assert_eq!( + (proxy_one_count + proxy_two_count + 1) / 2, + total_requests_handled + ); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_rot_update_data() + .await + .expect("simulated RoT did not receive an update"); + + let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); + + assert_eq!( + hubris_archive.image.data.as_slice(), + &*last_update_image, + "simulated RoT update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + hubris_archive.image.data.len() + ); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_rot_updater_delivers_progress() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_rot_updater_delivers_progress", + SpPort::One, + ) + .await; + + // Configure an MGS client. + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let hubris_archive = make_fake_rot_image(); + let target_rot_slot = RotSlot::B; + + let rot_updater = RotUpdater::new( + sp_type, + sp_slot, + target_rot_slot, + update_id, + hubris_archive.clone(), + &mgstestctx.logctx.log, + ); + + let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); + let rot_image_len = hubris_archive.image.data.len() as u32; + + // Subscribe to update progress, and check that there is no status yet; we + // haven't started the update. + let mut progress = rot_updater.progress_watcher(); + assert_eq!(*progress.borrow_and_update(), None); + + // Install a semaphore on the requests our target SP will receive so we can + // inspect progress messages without racing. + let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; + let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; + let mut sp_responses = target_sp.responses_sent_count().unwrap(); + + // Spawn the update on a background task so we can watch `progress` as it is + // applied. + let do_update_task = tokio::spawn(rot_updater.update(mgs_clients)); + + // Allow the SP to respond to 1 message: the "prepare update" messages that + // trigger the start of an update, then ensure we see the "started" + // progress. + sp_accept_sema.send(1).unwrap(); + progress.changed().await.unwrap(); + assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); + + // Ensure our simulated SP is in the state we expect: it's prepared for an + // update but has not yet received any data. + assert_eq!( + target_sp.current_update_status().await, + UpdateStatus::InProgress(UpdateInProgressStatus { + id: update_id.into(), + bytes_received: 0, + total_size: rot_image_len, + }) + ); + + // Record the number of responses the SP has sent; we'll use + // `sp_responses.changed()` in the loop below, and want to mark whatever + // value this watch channel currently has as seen. + sp_responses.borrow_and_update(); + + // At this point, there are two clients racing each other to talk to our + // simulated SP: + // + // 1. MGS is trying to deliver the update + // 2. `rot_updater` is trying to poll (via MGS) for update status + // + // and we want to ensure that we see any relevant progress reports from + // `rot_updater`. We'll let one MGS -> SP message through at a time (waiting + // until our SP has responded by waiting for a change to `sp_responses`) + // then check its update state: if it changed, the packet we let through was + // data from MGS; otherwise, it was a status request from `rot_updater`. + // + // This loop will continue until either: + // + // 1. We see an `UpdateStatus::InProgress` message indicating 100% delivery, + // at which point we break out of the loop + // 2. We time out waiting for the previous step (by timing out for either + // the SP to process a request or `rot_updater` to realize there's been + // progress), at which point we panic and fail this test. + let mut prev_bytes_received = 0; + let mut expect_progress_change = false; + loop { + // Allow the SP to accept and respond to a single UDP packet. + sp_accept_sema.send(1).unwrap(); + + // Wait until the SP has sent a response, with a safety rail that we + // haven't screwed up our untangle-the-race logic: if we don't see the + // SP process any new messages after several seconds, our test is + // broken, so fail. + tokio::time::timeout(Duration::from_secs(10), sp_responses.changed()) + .await + .expect("timeout waiting for SP response count to change") + .expect("sp response count sender dropped"); + + // Inspec the SP's in-memory update state; we expect only `InProgress` + // or `Complete`, and in either case we note whether we expect to see + // status changes from `rot_updater`. + match target_sp.current_update_status().await { + UpdateStatus::InProgress(rot_progress) => { + if rot_progress.bytes_received > prev_bytes_received { + prev_bytes_received = rot_progress.bytes_received; + expect_progress_change = true; + continue; + } + } + UpdateStatus::Complete(_) => { + if prev_bytes_received < rot_image_len { + prev_bytes_received = rot_image_len; + continue; + } + } + status @ (UpdateStatus::None + | UpdateStatus::Preparing(_) + | UpdateStatus::SpUpdateAuxFlashChckScan { .. } + | UpdateStatus::Aborted(_) + | UpdateStatus::Failed { .. } + | UpdateStatus::RotError { .. }) => { + panic!("unexpected status {status:?}"); + } + } + + // If we get here, the most recent packet did _not_ change the SP's + // internal update state, so it was a status request from `rot_updater`. + // If we expect the updater to see new progress, wait for that change + // here. + if expect_progress_change || prev_bytes_received == rot_image_len { + // Safety rail that we haven't screwed up our untangle-the-race + // logic: if we don't see a new progress after several seconds, our + // test is broken, so fail. + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + expect_progress_change = false; + + // We're done if we've observed the final progress message. + if let UpdateProgress::InProgress { progress: Some(value) } = status + { + if value == 1.0 { + break; + } + } else { + panic!("unexpected progerss status {status:?}"); + } + } + } + + // The update has been fully delivered to the SP, but we don't see an + // `UpdateStatus::Complete` message until the RoT is reset. Release the SP + // semaphore since we're no longer racing to observe intermediate progress, + // and wait for the completion message. + sp_accept_sema.send(usize::MAX).unwrap(); + progress.changed().await.unwrap(); + assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Complete)); + + // drop our progress receiver so `do_update_task` can complete + mem::drop(progress); + + do_update_task.await.expect("update task panicked").expect("update failed"); + + let last_update_image = target_sp + .last_rot_update_data() + .await + .expect("simulated RoT did not receive an update"); + + assert_eq!( + hubris_archive.image.data.as_slice(), + &*last_update_image, + "simulated RoT update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + hubris_archive.image.data.len() + ); + + mgstestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 351c28ad9c..89735ac3d9 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -9,7 +9,9 @@ use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; use gateway_test_utils::setup as mgs_setup; use hubtools::RawHubrisArchive; use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; -use omicron_nexus::app::test_interfaces::{SpUpdater, UpdateProgress}; +use omicron_nexus::app::test_interfaces::{ + MgsClients, SpUpdater, UpdateProgress, +}; use sp_sim::SimulatedSp; use sp_sim::SIM_GIMLET_BOARD; use sp_sim::SIM_SIDECAR_BOARD; @@ -44,10 +46,10 @@ async fn test_sp_updater_updates_sled() { .await; // Configure an MGS client. - let mgs_client = Arc::new(gateway_client::Client::new( + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( &mgstestctx.client.url("/").to_string(), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )); + )]); // Configure and instantiate an `SpUpdater`. let sp_type = SpType::Sled; @@ -64,11 +66,11 @@ async fn test_sp_updater_updates_sled() { ); // Run the update. - sp_updater.update([mgs_client]).await.expect("update failed"); + sp_updater.update(mgs_clients).await.expect("update failed"); // Ensure the SP received the complete update. let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] - .last_update_data() + .last_sp_update_data() .await .expect("simulated SP did not receive an update"); @@ -94,10 +96,10 @@ async fn test_sp_updater_updates_switch() { .await; // Configure an MGS client. - let mgs_client = Arc::new(gateway_client::Client::new( + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( &mgstestctx.client.url("/").to_string(), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )); + )]); let sp_type = SpType::Switch; let sp_slot = 0; @@ -112,10 +114,10 @@ async fn test_sp_updater_updates_switch() { &mgstestctx.logctx.log, ); - sp_updater.update([mgs_client]).await.expect("update failed"); + sp_updater.update(mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] - .last_update_data() + .last_sp_update_data() .await .expect("simulated SP did not receive an update"); @@ -172,16 +174,16 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { // delivering an update requires a bare minimum of three requests (start the // update, query the status, reset the SP) and often more (if repeated // queries are required to wait for completion). - let mgs_clients = [ - Arc::new(gateway_client::Client::new( + let mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new( &format!("http://{failing_mgs_addr}"), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), - )), - Arc::new(gateway_client::Client::new( + ), + gateway_client::Client::new( &mgstestctx.client.url("/").to_string(), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )), - ]; + ), + ]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -199,7 +201,7 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { sp_updater.update(mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] - .last_update_data() + .last_sp_update_data() .await .expect("simulated SP did not receive an update"); @@ -288,18 +290,18 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); // Configure two MGS clients pointed at our two proxy tasks. - let mgs_clients = [ - Arc::new(gateway_client::Client::new_with_client( + let mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_one_addr}"), client.clone(), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), - )), - Arc::new(gateway_client::Client::new_with_client( + ), + gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_two_addr}"), client, mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient2")), - )), - ]; + ), + ]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -408,7 +410,7 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { ); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] - .last_update_data() + .last_sp_update_data() .await .expect("simulated SP did not receive an update"); @@ -434,10 +436,10 @@ async fn test_sp_updater_delivers_progress() { .await; // Configure an MGS client. - let mgs_client = Arc::new(gateway_client::Client::new( + let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( &mgstestctx.client.url("/").to_string(), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )); + )]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -468,7 +470,7 @@ async fn test_sp_updater_delivers_progress() { // Spawn the update on a background task so we can watch `progress` as it is // applied. - let do_update_task = tokio::spawn(sp_updater.update([mgs_client])); + let do_update_task = tokio::spawn(sp_updater.update(mgs_clients)); // Allow the SP to respond to 2 messages: the caboose check and the "prepare // update" messages that trigger the start of an update, then ensure we see @@ -589,10 +591,13 @@ async fn test_sp_updater_delivers_progress() { progress.changed().await.unwrap(); assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Complete)); + // drop our progress receiver so `do_update_task` can complete + mem::drop(progress); + do_update_task.await.expect("update task panicked").expect("update failed"); let last_update_image = target_sp - .last_update_data() + .last_sp_update_data() .await .expect("simulated SP did not receive an update"); diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index be8d903d3f..0c753b62b5 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -14,6 +14,7 @@ use crate::server::UdpServer; use crate::update::SimSpUpdate; use crate::Responsiveness; use crate::SimulatedSp; +use crate::SIM_ROT_BOARD; use anyhow::{anyhow, bail, Context, Result}; use async_trait::async_trait; use futures::future; @@ -107,10 +108,16 @@ impl SimulatedSp for Gimlet { self.rot.lock().unwrap().handle_deserialized(request) } - async fn last_update_data(&self) -> Option> { + async fn last_sp_update_data(&self) -> Option> { let handler = self.handler.as_ref()?; let handler = handler.lock().await; - handler.update_state.last_update_data() + handler.update_state.last_sp_update_data() + } + + async fn last_rot_update_data(&self) -> Option> { + let handler = self.handler.as_ref()?; + let handler = handler.lock().await; + handler.update_state.last_rot_update_data() } async fn current_update_status(&self) -> gateway_messages::UpdateStatus { @@ -573,7 +580,7 @@ struct Handler { power_state: PowerState, startup_options: StartupOptions, update_state: SimSpUpdate, - reset_pending: bool, + reset_pending: Option, // To simulate an SP reset, we should (after doing whatever housekeeping we // need to track the reset) intentionally _fail_ to respond to the request, @@ -615,7 +622,7 @@ impl Handler { power_state: PowerState::A2, startup_options: StartupOptions::empty(), update_state: SimSpUpdate::default(), - reset_pending: false, + reset_pending: None, should_fail_to_respond_signal: None, } } @@ -1065,8 +1072,9 @@ impl SpHandler for Handler { "port" => ?port, "component" => ?component, ); - if component == SpComponent::SP_ITSELF { - self.reset_pending = true; + if component == SpComponent::SP_ITSELF || component == SpComponent::ROT + { + self.reset_pending = Some(component); Ok(()) } else { Err(SpError::RequestUnsupportedForComponent) @@ -1086,9 +1094,9 @@ impl SpHandler for Handler { "component" => ?component, ); if component == SpComponent::SP_ITSELF { - if self.reset_pending { + if self.reset_pending == Some(SpComponent::SP_ITSELF) { self.update_state.sp_reset(); - self.reset_pending = false; + self.reset_pending = None; if let Some(signal) = self.should_fail_to_respond_signal.take() { // Instruct `server::handle_request()` to _not_ respond to @@ -1099,6 +1107,14 @@ impl SpHandler for Handler { } else { Err(SpError::ResetComponentTriggerWithoutPrepare) } + } else if component == SpComponent::ROT { + if self.reset_pending == Some(SpComponent::ROT) { + self.update_state.rot_reset(); + self.reset_pending = None; + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } } else { Err(SpError::RequestUnsupportedForComponent) } @@ -1322,7 +1338,7 @@ impl SpHandler for Handler { static SP_VERS: &[u8] = b"0.0.1"; static ROT_GITC: &[u8] = b"eeeeeeee"; - static ROT_BORD: &[u8] = b"SimGimletRot"; + static ROT_BORD: &[u8] = SIM_ROT_BOARD.as_bytes(); static ROT_NAME: &[u8] = b"SimGimlet"; static ROT_VERS: &[u8] = b"0.0.1"; diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 668c7c3311..0958e8a177 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -28,6 +28,8 @@ use std::net::SocketAddrV6; use tokio::sync::mpsc; use tokio::sync::watch; +pub const SIM_ROT_BOARD: &str = "SimRot"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Responsiveness { Responsive, @@ -58,8 +60,13 @@ pub trait SimulatedSp { /// Get the last completed update delivered to this simulated SP. /// - /// Only returns data after a simulated reset. - async fn last_update_data(&self) -> Option>; + /// Only returns data after a simulated reset of the SP. + async fn last_sp_update_data(&self) -> Option>; + + /// Get the last completed update delivered to this simulated RoT. + /// + /// Only returns data after a simulated reset of the RoT. + async fn last_rot_update_data(&self) -> Option>; /// Get the current update status, just as would be returned by an MGS /// request to get the update status. diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index c8fb4c5481..46fe8b5df7 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -16,6 +16,7 @@ use crate::server::UdpServer; use crate::update::SimSpUpdate; use crate::Responsiveness; use crate::SimulatedSp; +use crate::SIM_ROT_BOARD; use anyhow::Result; use async_trait::async_trait; use futures::future; @@ -118,10 +119,16 @@ impl SimulatedSp for Sidecar { self.rot.lock().unwrap().handle_deserialized(request) } - async fn last_update_data(&self) -> Option> { + async fn last_sp_update_data(&self) -> Option> { let handler = self.handler.as_ref()?; let handler = handler.lock().await; - handler.update_state.last_update_data() + handler.update_state.last_sp_update_data() + } + + async fn last_rot_update_data(&self) -> Option> { + let handler = self.handler.as_ref()?; + let handler = handler.lock().await; + handler.update_state.last_rot_update_data() } async fn current_update_status(&self) -> gateway_messages::UpdateStatus { @@ -380,7 +387,7 @@ struct Handler { power_state: PowerState, update_state: SimSpUpdate, - reset_pending: bool, + reset_pending: Option, // To simulate an SP reset, we should (after doing whatever housekeeping we // need to track the reset) intentionally _fail_ to respond to the request, @@ -419,7 +426,7 @@ impl Handler { rot_active_slot: RotSlotId::A, power_state: PowerState::A2, update_state: SimSpUpdate::default(), - reset_pending: false, + reset_pending: None, should_fail_to_respond_signal: None, } } @@ -846,8 +853,9 @@ impl SpHandler for Handler { "port" => ?port, "component" => ?component, ); - if component == SpComponent::SP_ITSELF { - self.reset_pending = true; + if component == SpComponent::SP_ITSELF || component == SpComponent::ROT + { + self.reset_pending = Some(component); Ok(()) } else { Err(SpError::RequestUnsupportedForComponent) @@ -867,9 +875,9 @@ impl SpHandler for Handler { "component" => ?component, ); if component == SpComponent::SP_ITSELF { - if self.reset_pending { + if self.reset_pending == Some(SpComponent::SP_ITSELF) { self.update_state.sp_reset(); - self.reset_pending = false; + self.reset_pending = None; if let Some(signal) = self.should_fail_to_respond_signal.take() { // Instruct `server::handle_request()` to _not_ respond to @@ -880,6 +888,14 @@ impl SpHandler for Handler { } else { Err(SpError::ResetComponentTriggerWithoutPrepare) } + } else if component == SpComponent::ROT { + if self.reset_pending == Some(SpComponent::ROT) { + self.update_state.rot_reset(); + self.reset_pending = None; + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } } else { Err(SpError::RequestUnsupportedForComponent) } @@ -1101,7 +1117,7 @@ impl SpHandler for Handler { static SP_VERS: &[u8] = b"0.0.1"; static ROT_GITC: &[u8] = b"eeeeeeee"; - static ROT_BORD: &[u8] = b"SimSidecarRot"; + static ROT_BORD: &[u8] = SIM_ROT_BOARD.as_bytes(); static ROT_NAME: &[u8] = b"SimSidecar"; static ROT_VERS: &[u8] = b"0.0.1"; diff --git a/sp-sim/src/update.rs b/sp-sim/src/update.rs index e57659ca1a..9879a3ecde 100644 --- a/sp-sim/src/update.rs +++ b/sp-sim/src/update.rs @@ -13,12 +13,17 @@ use gateway_messages::UpdateInProgressStatus; pub(crate) struct SimSpUpdate { state: UpdateState, - last_update_data: Option>, + last_sp_update_data: Option>, + last_rot_update_data: Option>, } impl Default for SimSpUpdate { fn default() -> Self { - Self { state: UpdateState::NotPrepared, last_update_data: None } + Self { + state: UpdateState::NotPrepared, + last_sp_update_data: None, + last_rot_update_data: None, + } } } @@ -80,6 +85,7 @@ impl SimSpUpdate { let mut stolen = Cursor::new(Box::default()); mem::swap(data, &mut stolen); self.state = UpdateState::Completed { + component: *component, id: *id, data: stolen.into_inner(), }; @@ -112,16 +118,37 @@ impl SimSpUpdate { } pub(crate) fn sp_reset(&mut self) { - self.last_update_data = match &self.state { - UpdateState::Completed { data, .. } => Some(data.clone()), + match &self.state { + UpdateState::Completed { data, component, .. } => { + if *component == SpComponent::SP_ITSELF { + self.last_sp_update_data = Some(data.clone()); + } + } + UpdateState::NotPrepared + | UpdateState::Prepared { .. } + | UpdateState::Aborted(_) => (), + } + } + + pub(crate) fn rot_reset(&mut self) { + match &self.state { + UpdateState::Completed { data, component, .. } => { + if *component == SpComponent::ROT { + self.last_rot_update_data = Some(data.clone()); + } + } UpdateState::NotPrepared | UpdateState::Prepared { .. } - | UpdateState::Aborted(_) => None, - }; + | UpdateState::Aborted(_) => (), + } + } + + pub(crate) fn last_sp_update_data(&self) -> Option> { + self.last_sp_update_data.clone() } - pub(crate) fn last_update_data(&self) -> Option> { - self.last_update_data.clone() + pub(crate) fn last_rot_update_data(&self) -> Option> { + self.last_rot_update_data.clone() } } @@ -138,6 +165,7 @@ enum UpdateState { }, Aborted(UpdateId), Completed { + component: SpComponent, id: UpdateId, data: Box<[u8]>, }, diff --git a/wicket-common/src/update_events.rs b/wicket-common/src/update_events.rs index e0f9d4b228..fe92887646 100644 --- a/wicket-common/src/update_events.rs +++ b/wicket-common/src/update_events.rs @@ -10,6 +10,7 @@ use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use std::fmt; +use std::sync::Arc; use thiserror::Error; use update_engine::errors::NestedEngineError; use update_engine::StepSpec; @@ -197,12 +198,13 @@ pub enum UpdateTerminalError { #[source] error: gateway_client::Error, }, - #[error("failed to upload trampoline phase 2 to MGS (was a new TUF repo uploaded?)")] - // Currently, the only way this error variant can be produced is if the - // upload task died or was replaced because a new TUF repository was - // uploaded. In the future, we may want to produce errors here if the upload - // to MGS fails too many times, for example. - TrampolinePhase2UploadFailed, + #[error("uploading trampoline phase 2 to MGS failed")] + TrampolinePhase2UploadFailed { + #[source] + error: Arc, + }, + #[error("uploading trampoline phase 2 to MGS cancelled (was a new TUF repo uploaded?)")] + TrampolinePhase2UploadCancelled, #[error("downloading installinator failed")] DownloadingInstallinatorFailed { #[source] diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index f4b5db2476..a86ea35cc3 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -41,7 +41,6 @@ use installinator_common::InstallinatorSpec; use installinator_common::M2Slot; use installinator_common::WriteOutput; use omicron_common::api::external::SemverVersion; -use omicron_common::backoff; use omicron_common::update::ArtifactHash; use slog::error; use slog::info; @@ -103,12 +102,22 @@ struct SpUpdateData { } #[derive(Debug)] -struct UploadTrampolinePhase2ToMgsStatus { - hash: ArtifactHash, - // The upload task retries forever until it succeeds, so we don't need to - // keep a "tried but failed" variant here; we just need to know the ID of - // the uploaded image once it's done. - uploaded_image_id: Option, +enum UploadTrampolinePhase2ToMgsStatus { + Running { hash: ArtifactHash }, + Done { hash: ArtifactHash, uploaded_image_id: HostPhase2RecoveryImageId }, + Failed(Arc), +} + +impl UploadTrampolinePhase2ToMgsStatus { + fn hash(&self) -> Option { + match self { + UploadTrampolinePhase2ToMgsStatus::Running { hash } + | UploadTrampolinePhase2ToMgsStatus::Done { hash, .. } => { + Some(*hash) + } + UploadTrampolinePhase2ToMgsStatus::Failed(_) => None, + } + } } #[derive(Debug)] @@ -308,9 +317,8 @@ impl UpdateTracker { ) -> UploadTrampolinePhase2ToMgs { let artifact = plan.trampoline_phase_2.clone(); let (status_tx, status_rx) = - watch::channel(UploadTrampolinePhase2ToMgsStatus { + watch::channel(UploadTrampolinePhase2ToMgsStatus::Running { hash: artifact.data.hash(), - uploaded_image_id: None, }); let task = tokio::spawn(upload_trampoline_phase_2_to_mgs( self.mgs_client.clone(), @@ -426,8 +434,8 @@ impl<'tr> SpawnUpdateDriver for RealSpawnUpdateDriver<'tr> { // this artifact? If not, cancel the old task (which // might still be trying to upload) and start a new one // with our current image. - if prev.status.borrow().hash - != plan.trampoline_phase_2.data.hash() + if prev.status.borrow().hash() + != Some(plan.trampoline_phase_2.data.hash()) { // It does _not_ match - we have a new plan with a // different trampoline image. If the old task is @@ -1147,19 +1155,38 @@ impl UpdateDriver { // We expect this loop to run just once, but iterate just in // case the image ID doesn't get populated the first time. loop { + match &*upload_trampoline_phase_2_to_mgs.borrow_and_update() + { + UploadTrampolinePhase2ToMgsStatus::Running { .. } => { + // fall through to `.changed()` below + }, + UploadTrampolinePhase2ToMgsStatus::Done { + uploaded_image_id, + .. + } => { + return StepSuccess::new( + uploaded_image_id.clone(), + ).into(); + } + UploadTrampolinePhase2ToMgsStatus::Failed(error) => { + let error = Arc::clone(error); + return Err(UpdateTerminalError::TrampolinePhase2UploadFailed { + error, + }); + } + } + + // `upload_trampoline_phase_2_to_mgs` holds onto the sending + // half of this channel until all receivers are gone, so the + // only way we can fail to receive here is if that task + // panicked (which would abort our process) or was cancelled + // (because a new TUF repo has been uploaded), in which case + // we should fail the current update. upload_trampoline_phase_2_to_mgs.changed().await.map_err( |_recv_err| { - UpdateTerminalError::TrampolinePhase2UploadFailed + UpdateTerminalError::TrampolinePhase2UploadCancelled } )?; - - if let Some(image_id) = upload_trampoline_phase_2_to_mgs - .borrow() - .uploaded_image_id - .as_ref() - { - return StepSuccess::new(image_id.clone()).into(); - } } }, ).register(); @@ -2149,59 +2176,68 @@ async fn upload_trampoline_phase_2_to_mgs( status: watch::Sender, log: Logger, ) { - let data = artifact.data; - let hash = data.hash(); - let upload_task = move || { - let mgs_client = mgs_client.clone(); - let data = data.clone(); - - async move { - let image_stream = data.reader_stream().await.map_err(|e| { - // TODO-correctness If we get an I/O error opening the file - // associated with `data`, is it actually a transient error? If - // we change this to `permanent` we'll have to do some different - // error handling below and at our call site to retry. We - // _shouldn't_ get errors from `reader_stream()` in general, so - // it's probably okay either way? - backoff::BackoffError::transient(format!("{e:#}")) - })?; - mgs_client - .recovery_host_phase2_upload(reqwest::Body::wrap_stream( - image_stream, - )) - .await - .map_err(|e| backoff::BackoffError::transient(e.to_string())) - } - }; + // We make at most 3 attempts to upload the trampoline to our local MGS, + // sleeping briefly between attempts if we fail. + const MAX_ATTEMPTS: usize = 3; + const SLEEP_BETWEEN_ATTEMPTS: Duration = Duration::from_secs(1); + + let mut attempt = 1; + let final_status = loop { + let image_stream = match artifact.data.reader_stream().await { + Ok(stream) => stream, + Err(err) => { + error!( + log, "failed to read trampoline phase 2"; + "err" => #%err, + ); + break UploadTrampolinePhase2ToMgsStatus::Failed(Arc::new( + err.context("failed to read trampoline phase 2"), + )); + } + }; - let log_failure = move |err, delay| { - warn!( - log, - "failed to upload trampoline phase 2 to MGS, will retry in {:?}", - delay; - "err" => %err, - ); + match mgs_client + .recovery_host_phase2_upload(reqwest::Body::wrap_stream( + image_stream, + )) + .await + { + Ok(response) => { + break UploadTrampolinePhase2ToMgsStatus::Done { + hash: artifact.data.hash(), + uploaded_image_id: response.into_inner(), + }; + } + Err(err) => { + if attempt < MAX_ATTEMPTS { + error!( + log, "failed to upload trampoline phase 2 to MGS; \ + will retry after {SLEEP_BETWEEN_ATTEMPTS:?}"; + "attempt" => attempt, + "err" => %DisplayErrorChain::new(&err), + ); + tokio::time::sleep(SLEEP_BETWEEN_ATTEMPTS).await; + attempt += 1; + continue; + } else { + error!( + log, "failed to upload trampoline phase 2 to MGS; \ + giving up"; + "attempt" => attempt, + "err" => %DisplayErrorChain::new(&err), + ); + break UploadTrampolinePhase2ToMgsStatus::Failed(Arc::new( + anyhow::Error::new(err) + .context("failed to upload trampoline phase 2"), + )); + } + } + } }; - // retry_policy_internal_service_aggressive() retries forever, so we can - // unwrap this call to retry_notify - let uploaded_image_id = backoff::retry_notify( - backoff::retry_policy_internal_service_aggressive(), - upload_task, - log_failure, - ) - .await - .unwrap() - .into_inner(); - - // Notify all receivers that we've uploaded the image. - _ = status.send(UploadTrampolinePhase2ToMgsStatus { - hash, - uploaded_image_id: Some(uploaded_image_id), - }); - - // Wait for all receivers to be gone before we exit, so they don't get recv - // errors unless we're cancelled. + // Send our final status, then wait for all receivers to be gone before we + // exit, so they don't get recv errors unless we're cancelled. + status.send_replace(final_status); status.closed().await; } diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index b65833a74b..52bf1d1283 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -177,15 +177,15 @@ async fn test_updates() { StepEventKind::ExecutionFailed { failed_step, .. } => { // TODO: obviously we shouldn't stop here, get past more of the // update process in this test. - assert_eq!(failed_step.info.component, UpdateComponent::Rot); + assert_eq!(failed_step.info.component, UpdateComponent::Host); } other => { panic!("unexpected terminal event kind: {other:?}"); } } - // Try starting the update again -- this should fail because we require that update state is - // cleared before starting a new one. + // Try starting the update again -- this should fail because we require that + // update state is cleared before starting a new one. { let error = wicketd_testctx .wicketd_client @@ -197,8 +197,8 @@ async fn test_updates() { ); let error_str = error.to_string(); assert!( - // Errors lose type information across the OpenAPI boundary, so sadly we have to match on - // the error string. + // Errors lose type information across the OpenAPI boundary, so + // sadly we have to match on the error string. error_str.contains("existing update data found"), "unexpected error: {error_str}" ); From 26a8db3cd8b850c3bbf221b1d1d129148a788e08 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 21 Nov 2023 15:54:15 -0500 Subject: [PATCH 009/186] Various volume management fixes (#4410) This commit bundles up a few fixes related to volume management: - `find_deleted_volume_regions` now returns the `Option` that resulted from the `left_join` in the query for freed regions. This is then consulted to see if sending a DELETE for that region is safe: if the `Option` is `Some`, then the region snapshot has not been deleted yet, and sending a region DELETE will surely result in a `must delete snapshots first` error from the corresponding Crucible agent. - Fix a few typos in nexus/src/app/sagas/common_storage.rs - Nexus now waits for the Agent's `zfs destroy` of a snapshot to take place. Otherwise if illumos doesn't immediately remove the snapshot it may be returned by a subsequent `zfs list` later. - Either `decrease_crucible_resource_count_and_soft_delete_volume` or `volume_hard_delete` should be called when unwinding a saga, calling both is not required. - In the snapshot deletion saga, use `append_parallel` for the two volume delete sub sagas: in order to _not_ orphan Crucible resources, it's important that both volumes be soft deleted, and that failing to delete one volume's Crucible resources does not cause the other to _not_ be soft deleted. - Also fix a very confusing typo when building the destination volume delete sub saga. --- nexus/db-queries/src/db/datastore/volume.rs | 3 +- nexus/src/app/sagas/common_storage.rs | 218 ++++++++++++++++---- nexus/src/app/sagas/snapshot_create.rs | 23 ++- nexus/src/app/sagas/snapshot_delete.rs | 35 +++- nexus/src/app/sagas/volume_delete.rs | 138 ++++++++++--- 5 files changed, 319 insertions(+), 98 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 1e64d784f7..5f126050ae 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -457,7 +457,7 @@ impl DataStore { /// snapshots. pub async fn find_deleted_volume_regions( &self, - ) -> ListResultVec<(Dataset, Region, Volume)> { + ) -> ListResultVec<(Dataset, Region, Option, Volume)> { use db::schema::dataset::dsl as dataset_dsl; use db::schema::region::dsl as region_dsl; use db::schema::region_snapshot::dsl; @@ -494,6 +494,7 @@ impl DataStore { .select(( Dataset::as_select(), Region::as_select(), + Option::::as_select(), Volume::as_select(), )) .load_async(&*self.pool_connection_unauthorized().await?) diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index a57afb215d..a7350d91fd 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -73,7 +73,9 @@ pub(crate) async fn ensure_region_in_dataset( let log_create_failure = |_, delay| { warn!( log, - "Region requested, not yet created. Retrying in {:?}", delay + "Region requested, not yet created. Retrying in {:?}", + delay; + "region" => %region.id(), ); }; @@ -157,7 +159,12 @@ pub(super) async fn delete_crucible_region( .await; if let Err(e) = result { - error!(log, "delete_crucible_region: region_get saw {:?}", e); + error!( + log, + "delete_crucible_region: region_get saw {:?}", + e; + "region_id" => %region_id, + ); match e { crucible_agent_client::Error::ErrorResponse(rv) => { match rv.status() { @@ -191,7 +198,12 @@ pub(super) async fn delete_crucible_region( }) .await .map_err(|e| { - error!(log, "delete_crucible_region: region_delete saw {:?}", e); + error!( + log, + "delete_crucible_region: region_delete saw {:?}", + e; + "region_id" => %region_id, + ); match e { crucible_agent_client::Error::ErrorResponse(rv) => { match rv.status() { @@ -226,7 +238,12 @@ pub(super) async fn delete_crucible_region( }) .await .map_err(|e| { - error!(log, "delete_crucible_region: region_get saw {:?}", e); + error!( + log, + "delete_crucible_region: region_get saw {:?}", + e; + "region_id" => %region_id, + ); match e { crucible_agent_client::Error::ErrorResponse(rv) => { @@ -250,29 +267,33 @@ pub(super) async fn delete_crucible_region( })?; match region.state { - RegionState::Tombstoned => { - Err(BackoffError::transient(WaitError::Transient(anyhow!( - "region {} not deleted yet", - region_id.to_string(), - )))) - } + RegionState::Tombstoned => Err(BackoffError::transient( + WaitError::Transient(anyhow!("region not deleted yet")), + )), RegionState::Destroyed => { - info!(log, "region {} deleted", region_id.to_string(),); + info!( + log, + "region deleted"; + "region_id" => %region_id, + ); Ok(()) } - _ => { - Err(BackoffError::transient(WaitError::Transient(anyhow!( - "region {} unexpected state", - region_id.to_string(), - )))) - } + _ => Err(BackoffError::transient(WaitError::Transient( + anyhow!("region unexpected state {:?}", region.state), + ))), } }, |e: WaitError, delay| { - info!(log, "{:?}, trying again in {:?}", e, delay,); + info!( + log, + "{:?}, trying again in {:?}", + e, + delay; + "region_id" => %region_id, + ); }, ) .await @@ -338,8 +359,10 @@ pub(super) async fn delete_crucible_running_snapshot( .map_err(|e| { error!( log, - "delete_crucible_snapshot: region_delete_running_snapshot saw {:?}", - e + "delete_crucible_running_snapshot: region_delete_running_snapshot saw {:?}", + e; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, ); match e { crucible_agent_client::Error::ErrorResponse(rv) => { @@ -377,7 +400,14 @@ pub(super) async fn delete_crucible_running_snapshot( }) .await .map_err(|e| { - error!(log, "delete_crucible_snapshot: region_get_snapshots saw {:?}", e); + error!( + log, + "delete_crucible_running_snapshot: region_get_snapshots saw {:?}", + e; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, + ); + match e { crucible_agent_client::Error::ErrorResponse(rv) => { match rv.status() { @@ -409,19 +439,17 @@ pub(super) async fn delete_crucible_running_snapshot( Some(running_snapshot) => { info!( log, - "region {} snapshot {} running_snapshot is Some, state is {}", - region_id.to_string(), - snapshot_id.to_string(), - running_snapshot.state.to_string(), + "running_snapshot is Some, state is {}", + running_snapshot.state.to_string(); + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, ); match running_snapshot.state { RegionState::Tombstoned => { Err(BackoffError::transient( WaitError::Transient(anyhow!( - "region {} snapshot {} running_snapshot not deleted yet", - region_id.to_string(), - snapshot_id.to_string(), + "running_snapshot tombstoned, not deleted yet", ) ))) } @@ -429,9 +457,7 @@ pub(super) async fn delete_crucible_running_snapshot( RegionState::Destroyed => { info!( log, - "region {} snapshot {} running_snapshot deleted", - region_id.to_string(), - snapshot_id.to_string(), + "running_snapshot deleted", ); Ok(()) @@ -440,9 +466,7 @@ pub(super) async fn delete_crucible_running_snapshot( _ => { Err(BackoffError::transient( WaitError::Transient(anyhow!( - "region {} snapshot {} running_snapshot unexpected state", - region_id.to_string(), - snapshot_id.to_string(), + "running_snapshot unexpected state", ) ))) } @@ -453,9 +477,9 @@ pub(super) async fn delete_crucible_running_snapshot( // deleted? info!( log, - "region {} snapshot {} running_snapshot is None", - region_id.to_string(), - snapshot_id.to_string(), + "running_snapshot is None"; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, ); // break here - it's possible that the running snapshot @@ -469,7 +493,9 @@ pub(super) async fn delete_crucible_running_snapshot( log, "{:?}, trying again in {:?}", e, - delay, + delay; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, ); } ) @@ -494,7 +520,14 @@ pub(super) async fn delete_crucible_snapshot( region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - // delete snapshot - this endpoint is synchronous, it is not only a request + // Unlike other Crucible agent endpoints, this one is synchronous in that it + // is not only a request to the Crucible agent: `zfs destroy` is performed + // right away. However this is still a request to illumos that may not take + // effect right away. Wait until the snapshot no longer appears in the list + // of region snapshots, meaning it was not returned from `zfs list`. + + info!(log, "deleting region {region_id} snapshot {snapshot_id}"); + retry_until_known_result(log, || async { client .region_delete_snapshot( @@ -507,7 +540,10 @@ pub(super) async fn delete_crucible_snapshot( .map_err(|e| { error!( log, - "delete_crucible_snapshot: region_delete_snapshot saw {:?}", e + "delete_crucible_snapshot: region_delete_snapshot saw {:?}", + e; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, ); match e { crucible_agent_client::Error::ErrorResponse(rv) => { @@ -524,7 +560,101 @@ pub(super) async fn delete_crucible_snapshot( } })?; - Ok(()) + #[derive(Debug, thiserror::Error)] + enum WaitError { + #[error("Transient error: {0}")] + Transient(#[from] anyhow::Error), + + #[error("Permanent error: {0}")] + Permanent(#[from] Error), + } + + backoff::retry_notify( + backoff::retry_policy_internal_service_aggressive(), + || async { + let response = retry_until_known_result(log, || async { + client + .region_get_snapshots(&RegionId(region_id.to_string())) + .await + }) + .await + .map_err(|e| { + error!( + log, + "delete_crucible_snapshot: region_get_snapshots saw {:?}", + e; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, + ); + match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + status if status.is_client_error() => { + BackoffError::Permanent(WaitError::Permanent( + Error::invalid_request(&rv.message), + )) + } + _ => BackoffError::Permanent(WaitError::Permanent( + Error::internal_error(&rv.message), + )), + } + } + _ => BackoffError::Permanent(WaitError::Permanent( + Error::internal_error( + "unexpected failure during `region_get_snapshots`", + ), + )), + } + })?; + + if response + .snapshots + .iter() + .any(|x| x.name == snapshot_id.to_string()) + { + info!( + log, + "snapshot still exists, waiting"; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, + ); + + Err(BackoffError::transient(WaitError::Transient(anyhow!( + "snapshot not deleted yet", + )))) + } else { + info!( + log, + "snapshot deleted"; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, + ); + + Ok(()) + } + }, + |e: WaitError, delay| { + info!( + log, + "{:?}, trying again in {:?}", + e, + delay; + "region_id" => %region_id, + "snapshot_id" => %snapshot_id, + ); + }, + ) + .await + .map_err(|e| match e { + WaitError::Transient(e) => { + // The backoff crate can be configured with a maximum elapsed time + // before giving up, which means that Transient could be returned + // here. Our current policies do **not** set this though. + Error::internal_error(&e.to_string()) + } + + WaitError::Permanent(e) => e, + }) } // Given a list of datasets and region snapshots, send DELETE calls to the @@ -645,10 +775,8 @@ pub(crate) async fn call_pantry_attach_for_disk( info!( log, - "sending attach for disk {} volume {} to endpoint {}", - disk_id, + "sending attach for disk {disk_id} volume {} to endpoint {endpoint}", disk.volume_id, - endpoint, ); let volume_construction_request: crucible_pantry_client::types::VolumeConstructionRequest = @@ -684,7 +812,7 @@ pub(crate) async fn call_pantry_detach_for_disk( ) -> Result<(), ActionError> { let endpoint = format!("http://{}", pantry_address); - info!(log, "sending detach for disk {} to endpoint {}", disk_id, endpoint,); + info!(log, "sending detach for disk {disk_id} to endpoint {endpoint}"); let client = crucible_pantry_client::Client::new(&endpoint); diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 5a686b2f3d..3b4dfc0043 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -496,17 +496,19 @@ async fn ssc_create_destination_volume_record( async fn ssc_create_destination_volume_record_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let destination_volume_id = sagactx.lookup::("destination_volume_id")?; - osagactx - .datastore() - .decrease_crucible_resource_count_and_soft_delete_volume( - destination_volume_id, - ) - .await?; + // This saga contains what is necessary to clean up the destination volume + // resources. It's safe here to perform a volume hard delete without + // decreasing the crucible resource count because the destination volume is + // guaranteed to never have read only resources that require that + // accounting. + + info!(log, "hard deleting volume {}", destination_volume_id,); osagactx.datastore().volume_hard_delete(destination_volume_id).await?; @@ -1396,17 +1398,22 @@ async fn ssc_create_volume_record_undo( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; + // `volume_create` will increase the resource count for read only resources + // in a volume, which there are guaranteed to be for snapshot volumes. + // decreasing crucible resources is necessary as an undo step. Do not call + // `volume_hard_delete` here: soft deleting volumes is necessary for + // `find_deleted_volume_regions` to work. + info!( log, "calling decrease crucible resource count for volume {}", volume_id ); + osagactx .datastore() .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) .await?; - osagactx.datastore().volume_hard_delete(volume_id).await?; - Ok(()) } diff --git a/nexus/src/app/sagas/snapshot_delete.rs b/nexus/src/app/sagas/snapshot_delete.rs index 0589b1ea03..75fc16754d 100644 --- a/nexus/src/app/sagas/snapshot_delete.rs +++ b/nexus/src/app/sagas/snapshot_delete.rs @@ -26,6 +26,9 @@ declare_saga_actions! { SPACE_ACCOUNT -> "no_result2" { + ssd_account_space } + NOOP -> "no_result3" { + + ssd_noop + } } #[derive(Debug)] @@ -71,7 +74,7 @@ impl NexusSaga for SagaSnapshotDelete { DELETE_VOLUME_DESTINATION_PARAMS, serde_json::to_value(&volume_delete_params).map_err(|e| { super::SagaInitError::SerializeError( - String::from("volume_id"), + String::from("destination_volume_id"), e, ) })?, @@ -83,16 +86,21 @@ impl NexusSaga for SagaSnapshotDelete { )); sagas::volume_delete::create_dag(subsaga_builder) }; - builder.append(steno::Node::subsaga( - "delete_volume", - make_volume_delete_dag()?, - DELETE_VOLUME_PARAMS, - )); - builder.append(steno::Node::subsaga( - "delete_destination_volume", - make_volume_delete_dag()?, - DELETE_VOLUME_DESTINATION_PARAMS, - )); + + builder.append_parallel(vec![ + steno::Node::subsaga( + "delete_volume", + make_volume_delete_dag()?, + DELETE_VOLUME_PARAMS, + ), + steno::Node::subsaga( + "delete_destination_volume", + make_volume_delete_dag()?, + DELETE_VOLUME_DESTINATION_PARAMS, + ), + ]); + + builder.append(noop_action()); Ok(builder.build()?) } @@ -148,3 +156,8 @@ async fn ssd_account_space( .map_err(ActionError::action_failed)?; Ok(()) } + +// Sagas must end in one node, not parallel +async fn ssd_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { + Ok(()) +} diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index 43530e913c..22425a0b99 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -332,6 +332,74 @@ async fn svd_delete_crucible_snapshot_records( /// be a different volume id (i.e. for a previously deleted disk) than the one /// in this saga's params struct. /// +/// It's insufficient to rely on the struct of CrucibleResources to clean up +/// that is returned as part of svd_decrease_crucible_resource_count. Imagine a +/// disk that is composed of three regions (a subset of +/// [`VolumeConstructionRequest`] is shown here): +/// +/// { +/// "type": "volume", +/// "id": "6b353c87-afac-4ee2-b71a-6fe35fcf9e46", +/// "sub_volumes": [ +/// { +/// "type": "region", +/// "opts": { +/// "targets": [ +/// "[fd00:1122:3344:101::5]:1000", +/// "[fd00:1122:3344:102::9]:1000", +/// "[fd00:1122:3344:103::2]:1000" +/// ], +/// "read_only": false +/// } +/// } +/// ], +/// "read_only_parent": null, +/// } +/// +/// Taking a snapshot of this will produce the following volume: +/// +/// { +/// "type": "volume", +/// "id": "1ef7282e-a3fb-4222-85a8-b16d3fbfd738", <-- new UUID +/// "sub_volumes": [ +/// { +/// "type": "region", +/// "opts": { +/// "targets": [ +/// "[fd00:1122:3344:101::5]:1001", <-- port changed +/// "[fd00:1122:3344:102::9]:1001", <-- port changed +/// "[fd00:1122:3344:103::2]:1001" <-- port changed +/// ], +/// "read_only": true <-- read_only now true +/// } +/// } +/// ], +/// "read_only_parent": null, +/// } +/// +/// The snapshot targets will use the same IP but different port: snapshots are +/// initially located on the same filesystem as their region. +/// +/// The disk's volume has no read only resources, while the snapshot's volume +/// does. The disk volume's targets are all regions (backed by downstairs that +/// are read/write) while the snapshot volume's targets are all snapshots +/// (backed by volumes that are read-only). The two volumes are linked in the +/// sense that the snapshots from the second are contained *within* the regions +/// of the first, reflecting the resource nesting from ZFS. This is also +/// reflected in the REST endpoint that the Crucible agent uses: +/// +/// /crucible/0/regions/{id}/snapshots/{name} +/// +/// If the disk is then deleted, the volume delete saga will run for the first +/// volume shown here. The CrucibleResources struct returned as part of +/// [`svd_decrease_crucible_resource_count`] will contain *nothing* to clean up: +/// the regions contain snapshots that are part of other volumes and cannot be +/// deleted, and the disk's volume doesn't reference any read-only resources. +/// +/// This is expected and normal: regions are "leaked" all the time due to +/// snapshots preventing their deletion. This part of the saga detects when +/// those regions can be cleaned up. +/// /// Note: each delete of a snapshot could trigger another delete of a region, if /// that region's use has gone to zero. A snapshot delete will never trigger /// another snapshot delete. @@ -353,42 +421,46 @@ async fn svd_delete_freed_crucible_regions( }, )?; - // Send DELETE calls to the corresponding Crucible agents - delete_crucible_regions( - log, + for (dataset, region, region_snapshot, volume) in freed_datasets_regions_and_volumes - .iter() - .map(|(d, r, _)| (d.clone(), r.clone())) - .collect(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_regions: {:?}", - e, - )) - })?; + { + if region_snapshot.is_some() { + // We cannot delete this region yet, the snapshot has not been + // deleted. This can occur when multiple volume delete sagas run + // concurrently: one will decrement the crucible resources (but + // hasn't made the appropriate DELETE calls to remove the running + // snapshots and snapshots yet), and the other will be here trying + // to delete the region. This race results in the crucible agent + // returning "must delete snapshots first" and causing saga unwinds. + // + // Another volume delete (probably the one racing with this one!) + // will pick up this region and remove it. + continue; + } + + // Send DELETE calls to the corresponding Crucible agents + delete_crucible_regions(log, vec![(dataset.clone(), region.clone())]) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to delete_crucible_regions: {:?}", + e, + )) + })?; - // Remove region DB records - osagactx - .datastore() - .regions_hard_delete( - log, - freed_datasets_regions_and_volumes - .iter() - .map(|(_, r, _)| r.id()) - .collect(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to regions_hard_delete: {:?}", - e, - )) - })?; + // Remove region DB record + osagactx + .datastore() + .regions_hard_delete(log, vec![region.id()]) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to regions_hard_delete: {:?}", + e, + )) + })?; - // Remove volume DB records - for (_, _, volume) in &freed_datasets_regions_and_volumes { + // Remove volume DB record osagactx.datastore().volume_hard_delete(volume.id()).await.map_err( |e| { ActionError::action_failed(format!( From 2f92c02a0c9d1585ff1b3210565b0c9f0f25e9a4 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 21 Nov 2023 13:55:00 -0800 Subject: [PATCH 010/186] Reject bad combinations of `up*.sql` migrations (#4546) Fixes #4531. If I cherry-pick these changes onto `main` just prior to #4529 (i.e., when the `11.0.0` directory contained upsql files for both 10.0.0 and 11.0.0), the `dbinit_equals_sum_of_all_up` integration test fails as desired: ``` thread 'integration_tests::schema::dbinit_equals_sum_of_all_up' panicked at nexus/tests/integration_tests/schema.rs:133:58: called `Result::unwrap()` on an `Err` value: "invalid `up*.sql` combination: /data/github/omicron/nexus/../schema/crdb/11.0.0/up01.sql, /data/github/omicron/nexus/../schema/crdb/11.0.0/up1.sql" ``` --- Cargo.lock | 2 +- nexus/db-queries/Cargo.toml | 2 +- .../src/db/datastore/db_metadata.rs | 242 +++++++++++++++++- schema/crdb/README.adoc | 11 +- 4 files changed, 237 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b324f4919b..2e0663161d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4007,6 +4007,7 @@ dependencies = [ "base64 0.21.5", "bb8", "camino", + "camino-tempfile", "chrono", "cookie", "db-macros", @@ -4059,7 +4060,6 @@ dependencies = [ "steno", "strum", "subprocess", - "tempfile", "term", "thiserror", "tokio", diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index b1b8f3b28f..94e3a56abf 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -58,6 +58,7 @@ omicron-workspace-hack.workspace = true [dev-dependencies] assert_matches.workspace = true +camino-tempfile.workspace = true expectorate.workspace = true hyper-rustls.workspace = true gateway-client.workspace = true @@ -75,5 +76,4 @@ regex.workspace = true rustls.workspace = true strum.workspace = true subprocess.workspace = true -tempfile.workspace = true term.workspace = true diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 0ae61a7c38..39a70f7a1e 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -26,12 +26,14 @@ use std::str::FromStr; pub const EARLIEST_SUPPORTED_VERSION: &'static str = "1.0.0"; /// Describes a single file containing a schema change, as SQL. +#[derive(Debug)] pub struct SchemaUpgradeStep { pub path: Utf8PathBuf, pub sql: String, } /// Describes a sequence of files containing schema changes. +#[derive(Debug)] pub struct SchemaUpgrade { pub steps: Vec, } @@ -39,10 +41,18 @@ pub struct SchemaUpgrade { /// Reads a "version directory" and reads all SQL changes into /// a result Vec. /// -/// Any file that starts with "up" and ends with "sql" is considered -/// part of the migration, and fully read to a string. +/// Files that do not begin with "up" and end with ".sql" are ignored. The +/// collection of `up*.sql` files must fall into one of these two conventions: /// -/// These are sorted lexicographically. +/// * "up.sql" with no other files +/// * "up1.sql", "up2.sql", ..., beginning from 1, optionally with leading +/// zeroes (e.g., "up01.sql", "up02.sql", ...). There is no maximum value, but +/// there may not be any gaps (e.g., if "up2.sql" and "up4.sql" exist, so must +/// "up3.sql") and there must not be any repeats (e.g., if "up1.sql" exists, +/// "up01.sql" must not exist). +/// +/// Any violation of these two rules will result in an error. Collections of the +/// second form (`up1.sql`, ...) will be sorted numerically. pub async fn all_sql_for_version_migration>( path: P, ) -> Result { @@ -54,19 +64,83 @@ pub async fn all_sql_for_version_migration>( for entry in entries { let entry = entry.map_err(|err| format!("Invalid entry: {err}"))?; let pathbuf = entry.into_path(); - let is_up = pathbuf - .file_name() - .map(|name| name.starts_with("up")) - .unwrap_or(false); - let is_sql = matches!(pathbuf.extension(), Some("sql")); - if is_up && is_sql { - up_sqls.push(pathbuf); + + // Ensure filename ends with ".sql" + if pathbuf.extension() != Some("sql") { + continue; + } + + // Ensure filename begins with "up", and extract anything in between + // "up" and ".sql". + let Some(remaining_filename) = pathbuf + .file_stem() + .and_then(|file_stem| file_stem.strip_prefix("up")) + else { + continue; + }; + + // Ensure the remaining filename is either empty (i.e., the filename is + // exactly "up.sql") or parseable as an unsigned integer. We give + // "up.sql" the "up_number" 0 (checked in the loop below), and require + // any other number to be nonzero. + if remaining_filename.is_empty() { + up_sqls.push((0, pathbuf)); + } else { + let Ok(up_number) = remaining_filename.parse::() else { + return Err(format!( + "invalid filename (non-numeric `up*.sql`): {pathbuf}", + )); + }; + if up_number == 0 { + return Err(format!( + "invalid filename (`up*.sql` numbering must start at 1): \ + {pathbuf}", + )); + } + up_sqls.push((up_number, pathbuf)); } } up_sqls.sort(); + // Validate that we have a reasonable sequence of `up*.sql` numbers. + match up_sqls.as_slice() { + [] => return Err("no `up*.sql` files found".to_string()), + [(up_number, path)] => { + // For a single file, we allow either `up.sql` (keyed as + // up_number=0) or `up1.sql`; reject any higher number. + if *up_number > 1 { + return Err(format!( + "`up*.sql` numbering must start at 1: found first file \ + {path}" + )); + } + } + _ => { + for (i, (up_number, path)) in up_sqls.iter().enumerate() { + // We have 2 or more `up*.sql`; they should be numbered exactly + // 1..=up_sqls.len(). + if i as u64 + 1 != *up_number { + // We know we have at least two elements, so report an error + // referencing either the next item (if we're first) or the + // previous item (if we're not first). + let (path_a, path_b) = if i == 0 { + let (_, next_path) = &up_sqls[1]; + (path, next_path) + } else { + let (_, prev_path) = &up_sqls[i - 1]; + (prev_path, path) + }; + return Err(format!( + "invalid `up*.sql` combination: {path_a}, {path_b}" + )); + } + } + } + } + + // This collection of `up*.sql` files is valid; read them all, in order. let mut result = SchemaUpgrade { steps: vec![] }; - for path in up_sqls.into_iter() { + for (_, path) in up_sqls.into_iter() { let sql = tokio::fs::read_to_string(&path) .await .map_err(|e| format!("Cannot read {path}: {e}"))?; @@ -403,11 +477,150 @@ impl DataStore { #[cfg(test)] mod test { use super::*; + use camino_tempfile::Utf8TempDir; use nexus_db_model::schema::SCHEMA_VERSION; use nexus_test_utils::db as test_db; use omicron_test_utils::dev; use std::sync::Arc; + // Confirm that `all_sql_for_version_migration` rejects `up*.sql` files + // where the `*` doesn't contain a positive integer. + #[tokio::test] + async fn all_sql_for_version_migration_rejects_invalid_up_sql_names() { + for (invalid_filename, error_prefix) in [ + ("upA.sql", "invalid filename (non-numeric `up*.sql`)"), + ("up1a.sql", "invalid filename (non-numeric `up*.sql`)"), + ("upaaa1.sql", "invalid filename (non-numeric `up*.sql`)"), + ("up-3.sql", "invalid filename (non-numeric `up*.sql`)"), + ( + "up0.sql", + "invalid filename (`up*.sql` numbering must start at 1)", + ), + ( + "up00.sql", + "invalid filename (`up*.sql` numbering must start at 1)", + ), + ( + "up000.sql", + "invalid filename (`up*.sql` numbering must start at 1)", + ), + ] { + let tempdir = Utf8TempDir::new().unwrap(); + let filename = tempdir.path().join(invalid_filename); + _ = tokio::fs::File::create(&filename).await.unwrap(); + + match all_sql_for_version_migration(tempdir.path()).await { + Ok(upgrade) => { + panic!( + "unexpected success on {invalid_filename} \ + (produced {upgrade:?})" + ); + } + Err(message) => { + assert_eq!(message, format!("{error_prefix}: {filename}")); + } + } + } + } + + // Confirm that `all_sql_for_version_migration` rejects a directory with no + // appriopriately-named files. + #[tokio::test] + async fn all_sql_for_version_migration_rejects_no_up_sql_files() { + for filenames in [ + &[] as &[&str], + &["README.md"], + &["foo.sql", "bar.sql"], + &["up1sql", "up2sql"], + ] { + let tempdir = Utf8TempDir::new().unwrap(); + for filename in filenames { + _ = tokio::fs::File::create(tempdir.path().join(filename)) + .await + .unwrap(); + } + + match all_sql_for_version_migration(tempdir.path()).await { + Ok(upgrade) => { + panic!( + "unexpected success on {filenames:?} \ + (produced {upgrade:?})" + ); + } + Err(message) => { + assert_eq!(message, "no `up*.sql` files found"); + } + } + } + } + + // Confirm that `all_sql_for_version_migration` rejects collections of + // `up*.sql` files with individually-valid names but that do not pass the + // rules of the entire collection. + #[tokio::test] + async fn all_sql_for_version_migration_rejects_invalid_up_sql_collections() + { + for invalid_filenames in [ + &["up.sql", "up1.sql"] as &[&str], + &["up1.sql", "up01.sql"], + &["up1.sql", "up3.sql"], + &["up1.sql", "up2.sql", "up3.sql", "up02.sql"], + ] { + let tempdir = Utf8TempDir::new().unwrap(); + for filename in invalid_filenames { + _ = tokio::fs::File::create(tempdir.path().join(filename)) + .await + .unwrap(); + } + + match all_sql_for_version_migration(tempdir.path()).await { + Ok(upgrade) => { + panic!( + "unexpected success on {invalid_filenames:?} \ + (produced {upgrade:?})" + ); + } + Err(message) => { + assert!( + message.starts_with("invalid `up*.sql` combination: "), + "message did not start with expected prefix: \ + {message:?}" + ); + } + } + } + } + + // Confirm that `all_sql_for_version_migration` accepts legal collections of + // `up*.sql` filenames. + #[tokio::test] + async fn all_sql_for_version_migration_allows_valid_up_sql_collections() { + for filenames in [ + &["up.sql"] as &[&str], + &["up1.sql", "up2.sql"], + &[ + "up01.sql", "up02.sql", "up03.sql", "up04.sql", "up05.sql", + "up06.sql", "up07.sql", "up08.sql", "up09.sql", "up10.sql", + "up11.sql", + ], + &["up00001.sql", "up00002.sql", "up00003.sql"], + ] { + let tempdir = Utf8TempDir::new().unwrap(); + for filename in filenames { + _ = tokio::fs::File::create(tempdir.path().join(filename)) + .await + .unwrap(); + } + + match all_sql_for_version_migration(tempdir.path()).await { + Ok(_) => (), + Err(message) => { + panic!("unexpected failure on {filenames:?}: {message:?}"); + } + } + } + } + // Confirms that calling the internal "ensure_schema" function can succeed // when the database is already at that version. #[tokio::test] @@ -444,7 +657,7 @@ mod test { let conn = pool.pool().get().await.unwrap(); // Mimic the layout of "schema/crdb". - let config_dir = tempfile::TempDir::new().unwrap(); + let config_dir = Utf8TempDir::new().unwrap(); // Helper to create the version directory and "up.sql". let add_upgrade = |version: SemverVersion, sql: String| { @@ -499,8 +712,9 @@ mod test { .await; // Show that the datastores can be created concurrently. - let config = - SchemaConfig { schema_dir: config_dir.path().to_path_buf() }; + let config = SchemaConfig { + schema_dir: config_dir.path().to_path_buf().into_std_path_buf(), + }; let _ = futures::future::join_all((0..10).map(|_| { let log = log.clone(); let pool = pool.clone(); diff --git a/schema/crdb/README.adoc b/schema/crdb/README.adoc index fba36ed73b..5b9c2f6a10 100644 --- a/schema/crdb/README.adoc +++ b/schema/crdb/README.adoc @@ -14,9 +14,11 @@ We use the following conventions: appear in each file. More on this below. ** If there's only one statement required, we put it into `up.sql`. ** If more than one change is needed, any number of files starting with `up` - and ending with `.sql` may be used. These files will be sorted in - lexicographic order before being executed. Each will be executed in a - separate transaction. + and ending with `.sql` may be used. These files must follow a + numerically-increasing pattern starting with 1 (leading prefixes are allowed, + so `up1.sql`, `up2.sql`, ..., or `up01.sql`, `up02.sql`, etc.), and they will + be sorted numerically by these values. Each will be executed in a separate + transaction. ** CockroachDB documentation recommends the following: "Execute schema changes ... in an explicit transaction consisting of the single schema change statement.". Practically this means: If you want to change multiple @@ -65,7 +67,8 @@ Process: * If only one SQL statement is necessary to get from `OLD_VERSION` to `NEW_VERSION`, put that statement into `schema/crdb/NEW_VERSION/up.sql`. If multiple statements are required, put each one into a separate file, naming - these `schema/crdb/NEW_VERSION/upN.sql` for as many `N` as you need. + these `schema/crdb/NEW_VERSION/upN.sql` for as many `N` as you need, staring + with `N=1`. ** Each file should contain _either_ one schema-modifying statement _or_ some number of data-modifying statements. You can combine multiple data-modifying statements. But you should not mix schema-modifying statements and From 5c6ad0836c41678010ce54b88cf80874382ad1bd Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 21 Nov 2023 13:57:21 -0800 Subject: [PATCH 011/186] Nexus inventory: Add collection of RoT CMPA and CFPA pages (#4496) The RoT can report four different 512-byte pages (CMPA, and CFPA active/inactive/scratch). Given multiple RoT artifacts that are viable (match the right board, etc.) but are signed with different keys, these pages are required to identify which archive was signed with a key that the RoT will accept. This PR adds collection of these pages to the inventory system added in #4291. The implementation here is fairly bulky but very mechanical, and is implemented almost identically to the way we collect cabooses: there's an `rot_page_which` to identify which of the four kinds of page it is, and a table for storing the relatively small number of raw page data values. Most of the changes in this PR resulted from "find where we're doing something for cabooses, then do the analogous thing for RoT pages". There are a couple minor quibbles in the unit tests that I'll point out by leaving comments below. The RoT pages now show up when viewing a collection through omdb (note that the quite long base64 string is truncated; there's a command line flag to override the truncation and show the full string): ```console $ omdb db inventory collections show e2f84867-010d-4ac3-bbf3-bc1e865da16b > x.txt note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:43301/omicron?sslmode=disable note: database schema version matches expected (11.0.0) collection: e2f84867-010d-4ac3-bbf3-bc1e865da16b collector: e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c (likely a Nexus instance) started: 2023-11-14T18:51:54.900Z done: 2023-11-14T18:51:54.942Z errors: 0 Sled SimGimlet00 part number: FAKE_SIM_GIMLET power: A2 revision: 0 MGS slot: Sled 0 (cubby 0) found at: 2023-11-14 18:51:54.924602 UTC from http://[::1]:42341 cabooses: SLOT BOARD NAME VERSION GIT_COMMIT SpSlot0 SimGimletSp SimGimlet 0.0.1 ffffffff SpSlot1 SimGimletSp SimGimlet 0.0.1 ffffffff RotSlotA SimGimletRot SimGimlet 0.0.1 eeeeeeee RotSlotB SimGimletRot SimGimlet 0.0.1 eeeeeeee RoT pages: SLOT DATA_BASE64 Cmpa Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAA... CfpaActive Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAA... CfpaInactive Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAA... CfpaScratch Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAA... RoT: active slot: slot A RoT: persistent boot preference: slot A RoT: pending persistent boot preference: - RoT: transient boot preference: - RoT: slot A SHA3-256: - RoT: slot B SHA3-256: - Sled SimGimlet01 part number: FAKE_SIM_GIMLET power: A2 revision: 0 MGS slot: Sled 1 (cubby 1) found at: 2023-11-14 18:51:54.935038 UTC from http://[::1]:42341 cabooses: SLOT BOARD NAME VERSION GIT_COMMIT SpSlot0 SimGimletSp SimGimlet 0.0.1 ffffffff SpSlot1 SimGimletSp SimGimlet 0.0.1 ffffffff RotSlotA SimGimletRot SimGimlet 0.0.1 eeeeeeee RotSlotB SimGimletRot SimGimlet 0.0.1 eeeeeeee RoT pages: SLOT DATA_BASE64 Cmpa Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAA... CfpaActive Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAA... CfpaInactive Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAA... CfpaScratch Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAA... RoT: active slot: slot A RoT: persistent boot preference: slot A RoT: pending persistent boot preference: - RoT: transient boot preference: - RoT: slot A SHA3-256: - RoT: slot B SHA3-256: - Switch SimSidecar0 part number: FAKE_SIM_SIDECAR power: A2 revision: 0 MGS slot: Switch 0 found at: 2023-11-14 18:51:54.904 UTC from http://[::1]:42341 cabooses: SLOT BOARD NAME VERSION GIT_COMMIT SpSlot0 SimSidecarSp SimSidecar 0.0.1 ffffffff SpSlot1 SimSidecarSp SimSidecar 0.0.1 ffffffff RotSlotA SimSidecarRot SimSidecar 0.0.1 eeeeeeee RotSlotB SimSidecarRot SimSidecar 0.0.1 eeeeeeee RoT pages: SLOT DATA_BASE64 Cmpa c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAA... CfpaActive c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAA... CfpaInactive c2lkZWNhci1jZnBhLWluYWN0aXZlAAAA... CfpaScratch c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAA... RoT: active slot: slot A RoT: persistent boot preference: slot A RoT: pending persistent boot preference: - RoT: transient boot preference: - RoT: slot A SHA3-256: - RoT: slot B SHA3-256: - Switch SimSidecar1 part number: FAKE_SIM_SIDECAR power: A2 revision: 0 MGS slot: Switch 1 found at: 2023-11-14 18:51:54.915680 UTC from http://[::1]:42341 cabooses: SLOT BOARD NAME VERSION GIT_COMMIT SpSlot0 SimSidecarSp SimSidecar 0.0.1 ffffffff SpSlot1 SimSidecarSp SimSidecar 0.0.1 ffffffff RotSlotA SimSidecarRot SimSidecar 0.0.1 eeeeeeee RotSlotB SimSidecarRot SimSidecar 0.0.1 eeeeeeee RoT pages: SLOT DATA_BASE64 Cmpa c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAA... CfpaActive c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAA... CfpaInactive c2lkZWNhci1jZnBhLWluYWN0aXZlAAAA... CfpaScratch c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAA... RoT: active slot: slot A RoT: persistent boot preference: slot A RoT: pending persistent boot preference: - RoT: transient boot preference: - RoT: slot A SHA3-256: - RoT: slot B SHA3-256: - ``` There's also a new `omdb` subcommand to report the RoT pages (which does not truncate, but if we think it should that'd be easy to change): ```console $ omdb db inventory rot-pages note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:43301/omicron?sslmode=disable note: database schema version matches expected (11.0.0) ID DATA_BASE64 099ba572-a978-4592-ae7a-452629377904 c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 0e9dc5b0-b190-43da-acb6-84450fdfdb94 c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 80923bac-fbcc-46e0-b861-9dba906c14f7 Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 98cc4225-a791-4092-99c6-81e27e8d8ffa c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= a32eaf95-a20e-4570-8860-e0fb584a2ff1 c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= c941810a-1c6a-4dda-9c71-41a0caf62ace Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= e96042d0-ae8a-435c-9118-1b71e8a9a651 Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= fdc27064-4338-4cbe-bfe5-622b11a9afbc Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= --- Cargo.lock | 2 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 129 ++++++- nexus/db-model/src/inventory.rs | 101 ++++- nexus/db-model/src/schema.rs | 26 +- .../db-queries/src/db/datastore/inventory.rs | 360 ++++++++++++++---- nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/builder.rs | 249 +++++++++++- nexus/inventory/src/collector.rs | 104 +++++ nexus/inventory/src/examples.rs | 72 +++- .../tests/output/collector_basic.txt | 28 ++ .../tests/output/collector_errors.txt | 28 ++ nexus/types/src/inventory.rs | 77 ++++ schema/crdb/13.0.0/up1.sql | 4 + schema/crdb/13.0.0/up2.sql | 2 + schema/crdb/13.0.0/up3.sql | 6 + schema/crdb/13.0.0/up4.sql | 17 + schema/crdb/dbinit.sql | 39 +- sp-sim/src/gimlet.rs | 19 +- sp-sim/src/sidecar.rs | 19 +- 20 files changed, 1193 insertions(+), 91 deletions(-) create mode 100644 schema/crdb/13.0.0/up1.sql create mode 100644 schema/crdb/13.0.0/up2.sql create mode 100644 schema/crdb/13.0.0/up3.sql create mode 100644 schema/crdb/13.0.0/up4.sql diff --git a/Cargo.lock b/Cargo.lock index 2e0663161d..3c9c31a2ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4084,6 +4084,7 @@ name = "nexus-inventory" version = "0.1.0" dependencies = [ "anyhow", + "base64 0.21.5", "chrono", "expectorate", "gateway-client", @@ -4730,6 +4731,7 @@ dependencies = [ "tabled", "textwrap 0.16.0", "tokio", + "unicode-width", "uuid", ] diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index a8834a0b29..7544374906 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -37,6 +37,7 @@ strum.workspace = true tabled.workspace = true textwrap.workspace = true tokio = { workspace = true, features = [ "full" ] } +unicode-width.workspace = true uuid.workspace = true ipnetwork.workspace = true omicron-workspace-hack.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 5fa19a1a27..85c55d4e61 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -51,6 +51,7 @@ use nexus_db_model::Sled; use nexus_db_model::Snapshot; use nexus_db_model::SnapshotState; use nexus_db_model::SwCaboose; +use nexus_db_model::SwRotPage; use nexus_db_model::Vmm; use nexus_db_model::Volume; use nexus_db_model::Zpool; @@ -70,10 +71,12 @@ use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; +use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::postgres_config::PostgresConfigWithUrl; use sled_agent_client::types::VolumeConstructionRequest; +use std::borrow::Cow; use std::cmp::Ordering; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -247,6 +250,8 @@ enum InventoryCommands { Cabooses, /// list and show details from particular collections Collections(CollectionsArgs), + /// list all root of trust pages ever found + RotPages, } #[derive(Debug, Args)] @@ -267,6 +272,9 @@ enum CollectionsCommands { struct CollectionsShowArgs { /// id of the collection id: Uuid, + /// show long strings in their entirety + #[clap(long)] + show_long_strings: bool, } #[derive(Debug, Args)] @@ -2233,9 +2241,25 @@ async fn cmd_db_inventory( command: CollectionsCommands::List, }) => cmd_db_inventory_collections_list(&conn, limit).await, InventoryCommands::Collections(CollectionsArgs { - command: CollectionsCommands::Show(CollectionsShowArgs { id }), + command: + CollectionsCommands::Show(CollectionsShowArgs { + id, + show_long_strings, + }), }) => { - cmd_db_inventory_collections_show(opctx, datastore, id, limit).await + let long_string_formatter = + LongStringFormatter { show_long_strings }; + cmd_db_inventory_collections_show( + opctx, + datastore, + id, + limit, + long_string_formatter, + ) + .await + } + InventoryCommands::RotPages => { + cmd_db_inventory_rot_pages(&conn, limit).await } } } @@ -2318,6 +2342,41 @@ async fn cmd_db_inventory_cabooses( Ok(()) } +async fn cmd_db_inventory_rot_pages( + conn: &DataStoreConnection<'_>, + limit: NonZeroU32, +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct RotPageRow { + id: Uuid, + data_base64: String, + } + + use db::schema::sw_root_of_trust_page::dsl; + let mut rot_pages = dsl::sw_root_of_trust_page + .limit(i64::from(u32::from(limit))) + .select(SwRotPage::as_select()) + .load_async(&**conn) + .await + .context("loading rot_pages")?; + check_limit(&rot_pages, limit, || "loading rot_pages"); + rot_pages.sort(); + + let rows = rot_pages.into_iter().map(|rot_page| RotPageRow { + id: rot_page.id, + data_base64: rot_page.data_base64, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + async fn cmd_db_inventory_collections_list( conn: &DataStoreConnection<'_>, limit: NonZeroU32, @@ -2400,6 +2459,7 @@ async fn cmd_db_inventory_collections_show( datastore: &DataStore, id: Uuid, limit: NonZeroU32, + long_string_formatter: LongStringFormatter, ) -> Result<(), anyhow::Error> { let (collection, incomplete) = datastore .inventory_collection_read_best_effort(opctx, id, limit) @@ -2411,7 +2471,7 @@ async fn cmd_db_inventory_collections_show( inv_collection_print(&collection).await?; let nerrors = inv_collection_print_errors(&collection).await?; - inv_collection_print_devices(&collection).await?; + inv_collection_print_devices(&collection, &long_string_formatter).await?; if nerrors > 0 { eprintln!( @@ -2467,6 +2527,7 @@ async fn inv_collection_print_errors( async fn inv_collection_print_devices( collection: &Collection, + long_string_formatter: &LongStringFormatter, ) -> Result<(), anyhow::Error> { // Assemble a list of baseboard ids, sorted first by device type (sled, // switch, power), then by slot number. This is the order in which we will @@ -2545,6 +2606,30 @@ async fn inv_collection_print_devices( .to_string(); println!("{}", textwrap::indent(&table.to_string(), " ")); + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct RotPageRow<'a> { + slot: String, + data_base64: Cow<'a, str>, + } + + println!(" RoT pages:"); + let rot_page_rows: Vec<_> = RotPageWhich::iter() + .filter_map(|which| { + collection.rot_page_for(which, baseboard_id).map(|d| (which, d)) + }) + .map(|(which, found_page)| RotPageRow { + slot: format!("{which:?}"), + data_base64: long_string_formatter + .maybe_truncate(&found_page.page.data_base64), + }) + .collect(); + let table = tabled::Table::new(rot_page_rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + if let Some(rot) = rot { println!(" RoT: active slot: slot {:?}", rot.active_slot); println!( @@ -2617,3 +2702,41 @@ async fn inv_collection_print_devices( Ok(()) } + +#[derive(Debug)] +struct LongStringFormatter { + show_long_strings: bool, +} + +impl LongStringFormatter { + fn maybe_truncate<'a>(&self, s: &'a str) -> Cow<'a, str> { + use unicode_width::UnicodeWidthChar; + + // pick an arbitrary width at which we'll truncate, knowing that these + // strings are probably contained in tables with other columns + const TRUNCATE_AT_WIDTH: usize = 32; + + // quick check for short strings or if we should show long strings in + // their entirety + if self.show_long_strings || s.len() <= TRUNCATE_AT_WIDTH { + return s.into(); + } + + // longer check; we'll do the proper thing here and check the unicode + // width, and we don't really care about speed, so we can just iterate + // over chars + let mut width = 0; + for (pos, ch) in s.char_indices() { + let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0); + if width + ch_width > TRUNCATE_AT_WIDTH { + let (prefix, _) = s.split_at(pos); + return format!("{prefix}...").into(); + } + width += ch_width; + } + + // if we didn't break out of the loop, `s` in its entirety is not too + // wide, so return it as-is + s.into() + } +} diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 5b09f289bb..d94334787d 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -6,7 +6,8 @@ use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, - inv_root_of_trust, inv_service_processor, sw_caboose, + inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, + sw_caboose, sw_root_of_trust_page, }; use crate::{impl_enum_type, SqlU16, SqlU32}; use chrono::DateTime; @@ -18,7 +19,7 @@ use diesel::pg::Pg; use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PowerState, RotSlot, + BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; use uuid::Uuid; @@ -132,6 +133,59 @@ impl From for nexus_types::inventory::CabooseWhich { } } +// See [`nexus_types::inventory::RotPageWhich`]. +impl_enum_type!( + #[derive(SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "root_of_trust_page_which"))] + pub struct RotPageWhichEnum; + + #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] + #[diesel(sql_type = RotPageWhichEnum)] + pub enum RotPageWhich; + + // Enum values + Cmpa => b"cmpa" + CfpaActive => b"cfpa_active" + CfpaInactive => b"cfpa_inactive" + CfpaScratch => b"cfpa_scratch" +); + +impl From for RotPageWhich { + fn from(c: nexus_types::inventory::RotPageWhich) -> Self { + use nexus_types::inventory as nexus_inventory; + match c { + nexus_inventory::RotPageWhich::Cmpa => RotPageWhich::Cmpa, + nexus_inventory::RotPageWhich::CfpaActive => { + RotPageWhich::CfpaActive + } + nexus_inventory::RotPageWhich::CfpaInactive => { + RotPageWhich::CfpaInactive + } + nexus_inventory::RotPageWhich::CfpaScratch => { + RotPageWhich::CfpaScratch + } + } + } +} + +impl From for nexus_types::inventory::RotPageWhich { + fn from(row: RotPageWhich) -> Self { + use nexus_types::inventory as nexus_inventory; + match row { + RotPageWhich::Cmpa => nexus_inventory::RotPageWhich::Cmpa, + RotPageWhich::CfpaActive => { + nexus_inventory::RotPageWhich::CfpaActive + } + RotPageWhich::CfpaInactive => { + nexus_inventory::RotPageWhich::CfpaInactive + } + RotPageWhich::CfpaScratch => { + nexus_inventory::RotPageWhich::CfpaScratch + } + } + } +} + // See [`nexus_types::inventory::SpType`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] @@ -271,6 +325,36 @@ impl From for Caboose { } } +/// See [`nexus_types::inventory::RotPage`]. +#[derive( + Queryable, + Insertable, + Clone, + Debug, + Selectable, + Eq, + PartialEq, + Ord, + PartialOrd, +)] +#[diesel(table_name = sw_root_of_trust_page)] +pub struct SwRotPage { + pub id: Uuid, + pub data_base64: String, +} + +impl From for SwRotPage { + fn from(p: RotPage) -> Self { + Self { id: Uuid::new_v4(), data_base64: p.data_base64 } + } +} + +impl From for RotPage { + fn from(row: SwRotPage) -> Self { + Self { data_base64: row.data_base64 } + } +} + /// See [`nexus_types::inventory::Collection`]. #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = inv_collection_error)] @@ -441,3 +525,16 @@ pub struct InvCaboose { pub which: CabooseWhich, pub sw_caboose_id: Uuid, } + +/// See [`nexus_types::inventory::RotPageFound`]. +#[derive(Queryable, Clone, Debug, Selectable)] +#[diesel(table_name = inv_root_of_trust_page)] +pub struct InvRotPage { + pub inv_collection_id: Uuid, + pub hw_baseboard_id: Uuid, + pub time_collected: DateTime, + pub source: String, + + pub which: RotPageWhich, + pub sw_root_of_trust_page_id: Uuid, +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 960b53873a..7f7dd57027 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1187,6 +1187,13 @@ table! { } } +table! { + sw_root_of_trust_page (id) { + id -> Uuid, + data_base64 -> Text, + } +} + table! { inv_collection (id) { id -> Uuid, @@ -1248,6 +1255,18 @@ table! { } } +table! { + inv_root_of_trust_page (inv_collection_id, hw_baseboard_id, which) { + inv_collection_id -> Uuid, + hw_baseboard_id -> Uuid, + time_collected -> Timestamptz, + source -> Text, + + which -> crate::RotPageWhichEnum, + sw_root_of_trust_page_id -> Uuid, + } +} + table! { bootstore_keys (key, generation) { key -> Text, @@ -1270,7 +1289,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(12, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(13, 0, 0); allow_tables_to_appear_in_same_query!( system_update, @@ -1285,6 +1304,11 @@ joinable!(ip_pool_range -> ip_pool (ip_pool_id)); allow_tables_to_appear_in_same_query!(inv_collection, inv_collection_error); joinable!(inv_collection_error -> inv_collection (inv_collection_id)); allow_tables_to_appear_in_same_query!(hw_baseboard_id, sw_caboose, inv_caboose); +allow_tables_to_appear_in_same_query!( + hw_baseboard_id, + sw_root_of_trust_page, + inv_root_of_trust_page +); allow_tables_to_appear_in_same_query!( dataset, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index b743d28ee8..28a438629e 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -35,10 +35,13 @@ use nexus_db_model::InvCaboose; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; use nexus_db_model::InvRootOfTrust; +use nexus_db_model::InvRotPage; use nexus_db_model::InvServiceProcessor; +use nexus_db_model::RotPageWhichEnum; use nexus_db_model::SpType; use nexus_db_model::SpTypeEnum; use nexus_db_model::SwCaboose; +use nexus_db_model::SwRotPage; use nexus_types::inventory::Collection; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; @@ -76,6 +79,11 @@ impl DataStore { .iter() .map(|s| SwCaboose::from((**s).clone())) .collect::>(); + let rot_pages = collection + .rot_pages + .iter() + .map(|p| SwRotPage::from((**p).clone())) + .collect::>(); let error_values = collection .errors .iter() @@ -140,6 +148,19 @@ impl DataStore { .await?; } + // Insert records (and generate ids) for each distinct RoT page that + // we've found. Like baseboards, these might already be present and + // rows in this table are not scoped to a particular collection + // because they only map (immutable) identifiers to UUIDs. + { + use db::schema::sw_root_of_trust_page::dsl; + let _ = diesel::insert_into(dsl::sw_root_of_trust_page) + .values(rot_pages) + .on_conflict_do_nothing() + .execute_async(&conn) + .await?; + } + // Insert a record describing the collection itself. { use db::schema::inv_collection::dsl; @@ -468,6 +489,85 @@ impl DataStore { } } + // Insert rows for the root of trust pages that we found. This is + // almost identical to inserting cabooses above, and just like for + // cabooses, we do this using INSERT INTO ... SELECT. We have these + // three tables: + // + // - `hw_baseboard` with an "id" primary key and lookup columns + // "part_number" and "serial_number" + // - `sw_root_of_trust_page` with an "id" primary key and lookup + // column "data_base64" + // - `inv_root_of_trust_page` with foreign keys "hw_baseboard_id", + // "sw_root_of_trust_page_id", and various other columns + // + // and generate an INSERT INTO query that is structurally the same + // as the caboose query described above. + for (which, tree) in &collection.rot_pages_found { + use db::schema::hw_baseboard_id::dsl as dsl_baseboard_id; + use db::schema::inv_root_of_trust_page::dsl as dsl_inv_rot_page; + use db::schema::sw_root_of_trust_page::dsl as dsl_sw_rot_page; + let db_which = nexus_db_model::RotPageWhich::from(*which); + for (baseboard_id, found_rot_page) in tree { + let selection = db::schema::hw_baseboard_id::table + .inner_join( + db::schema::sw_root_of_trust_page::table.on( + dsl_baseboard_id::part_number + .eq(baseboard_id.part_number.clone()) + .and( + dsl_baseboard_id::serial_number.eq( + baseboard_id.serial_number.clone(), + ), + ) + .and(dsl_sw_rot_page::data_base64.eq( + found_rot_page.page.data_base64.clone(), + )), + ), + ) + .select(( + dsl_baseboard_id::id, + dsl_sw_rot_page::id, + collection_id.into_sql::(), + found_rot_page + .time_collected + .into_sql::(), + found_rot_page + .source + .clone() + .into_sql::(), + db_which.into_sql::(), + )); + + let _ = diesel::insert_into( + db::schema::inv_root_of_trust_page::table, + ) + .values(selection) + .into_columns(( + dsl_inv_rot_page::hw_baseboard_id, + dsl_inv_rot_page::sw_root_of_trust_page_id, + dsl_inv_rot_page::inv_collection_id, + dsl_inv_rot_page::time_collected, + dsl_inv_rot_page::source, + dsl_inv_rot_page::which, + )) + .execute_async(&conn) + .await?; + + // See the comments above. The same applies here. If you + // update the statement below because the schema for + // `inv_root_of_trust_page` has changed, be sure to update + // the code above, too! + let ( + _hw_baseboard_id, + _sw_root_of_trust_page_id, + _inv_collection_id, + _time_collected, + _source, + _which, + ) = dsl_inv_rot_page::inv_root_of_trust_page::all_columns(); + } + } + // Finally, insert the list of errors. { use db::schema::inv_collection_error::dsl as errors_dsl; @@ -720,7 +820,7 @@ impl DataStore { // start removing it and we'd also need to make sure we didn't leak a // collection if we crash while deleting it. let conn = self.pool_connection_authorized(opctx).await?; - let (ncollections, nsps, nrots, ncabooses, nerrors) = conn + let (ncollections, nsps, nrots, ncabooses, nrot_pages, nerrors) = conn .transaction_async(|conn| async move { // Remove the record describing the collection itself. let ncollections = { @@ -729,7 +829,7 @@ impl DataStore { dsl::inv_collection.filter(dsl::id.eq(collection_id)), ) .execute_async(&conn) - .await?; + .await? }; // Remove rows for service processors. @@ -740,7 +840,7 @@ impl DataStore { .filter(dsl::inv_collection_id.eq(collection_id)), ) .execute_async(&conn) - .await?; + .await? }; // Remove rows for roots of trust. @@ -751,7 +851,7 @@ impl DataStore { .filter(dsl::inv_collection_id.eq(collection_id)), ) .execute_async(&conn) - .await?; + .await? }; // Remove rows for cabooses found. @@ -762,7 +862,18 @@ impl DataStore { .filter(dsl::inv_collection_id.eq(collection_id)), ) .execute_async(&conn) - .await?; + .await? + }; + + // Remove rows for root of trust pages found. + let nrot_pages = { + use db::schema::inv_root_of_trust_page::dsl; + diesel::delete( + dsl::inv_root_of_trust_page + .filter(dsl::inv_collection_id.eq(collection_id)), + ) + .execute_async(&conn) + .await? }; // Remove rows for errors encountered. @@ -773,10 +884,10 @@ impl DataStore { .filter(dsl::inv_collection_id.eq(collection_id)), ) .execute_async(&conn) - .await?; + .await? }; - Ok((ncollections, nsps, nrots, ncabooses, nerrors)) + Ok((ncollections, nsps, nrots, ncabooses, nrot_pages, nerrors)) }) .await .map_err(|error| match error { @@ -792,6 +903,7 @@ impl DataStore { "nsps" => nsps, "nrots" => nrots, "ncabooses" => ncabooses, + "nrot_pages" => nrot_pages, "nerrors" => nerrors, ); @@ -1068,6 +1180,88 @@ impl DataStore { ); } + // Fetch records of RoT pages found. + let inv_rot_page_rows = { + use db::schema::inv_root_of_trust_page::dsl; + dsl::inv_root_of_trust_page + .filter(dsl::inv_collection_id.eq(id)) + .limit(sql_limit) + .select(InvRotPage::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + }; + limit_reached = limit_reached || inv_rot_page_rows.len() == usize_limit; + + // Collect the unique sw_rot_page_ids for those pages. + let sw_rot_page_ids: BTreeSet<_> = inv_rot_page_rows + .iter() + .map(|inv_rot_page| inv_rot_page.sw_root_of_trust_page_id) + .collect(); + // Fetch the corresponing records. + let rot_pages_by_id: BTreeMap<_, _> = { + use db::schema::sw_root_of_trust_page::dsl; + dsl::sw_root_of_trust_page + .filter(dsl::id.eq_any(sw_rot_page_ids)) + .limit(sql_limit) + .select(SwRotPage::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|sw_rot_page_row| { + ( + sw_rot_page_row.id, + Arc::new(nexus_types::inventory::RotPage::from( + sw_rot_page_row, + )), + ) + }) + .collect() + }; + limit_reached = limit_reached || rot_pages_by_id.len() == usize_limit; + + // Assemble the lists of rot pages found. + let mut rot_pages_found = BTreeMap::new(); + for p in inv_rot_page_rows { + let by_baseboard = rot_pages_found + .entry(nexus_types::inventory::RotPageWhich::from(p.which)) + .or_insert_with(BTreeMap::new); + let Some(bb) = baseboards_by_id.get(&p.hw_baseboard_id) else { + let msg = format!( + "unknown baseboard found in inv_root_of_trust_page: {}", + p.hw_baseboard_id + ); + return Err(Error::internal_error(&msg)); + }; + let Some(sw_rot_page) = + rot_pages_by_id.get(&p.sw_root_of_trust_page_id) + else { + let msg = format!( + "unknown rot page found in inv_root_of_trust_page: {}", + p.sw_root_of_trust_page_id + ); + return Err(Error::internal_error(&msg)); + }; + + let previous = by_baseboard.insert( + bb.clone(), + nexus_types::inventory::RotPageFound { + time_collected: p.time_collected, + source: p.source, + page: sw_rot_page.clone(), + }, + ); + bail_unless!( + previous.is_none(), + "duplicate rot page found: {:?} baseboard {:?}", + p.which, + p.hw_baseboard_id + ); + } + Ok(( Collection { id, @@ -1077,9 +1271,11 @@ impl DataStore { collector, baseboards: baseboards_by_id.values().cloned().collect(), cabooses: cabooses_by_id.values().cloned().collect(), + rot_pages: rot_pages_by_id.values().cloned().collect(), sps, rots, cabooses_found, + rot_pages_found, }, limit_reached, )) @@ -1141,6 +1337,7 @@ mod test { use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; + use nexus_types::inventory::RotPageWhich; use omicron_test_utils::dev; use std::num::NonZeroU32; use uuid::Uuid; @@ -1156,28 +1353,44 @@ mod test { .await?) } - async fn count_baseboards_cabooses( - conn: &DataStoreConnection<'_>, - ) -> anyhow::Result<(usize, usize)> { - conn.transaction_async(|conn| async move { - conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); - let bb_count = schema::hw_baseboard_id::dsl::hw_baseboard_id - .select(diesel::dsl::count_star()) - .first_async::(&conn) - .await - .context("failed to count baseboards")?; - let caboose_count = schema::sw_caboose::dsl::sw_caboose - .select(diesel::dsl::count_star()) - .first_async::(&conn) - .await - .context("failed to count cabooses")?; - let bb_count_usize = usize::try_from(bb_count) - .context("failed to convert baseboard count to usize")?; - let caboose_count_usize = usize::try_from(caboose_count) - .context("failed to convert caboose count to usize")?; - Ok((bb_count_usize, caboose_count_usize)) - }) - .await + struct CollectionCounts { + baseboards: usize, + cabooses: usize, + rot_pages: usize, + } + + impl CollectionCounts { + async fn new(conn: &DataStoreConnection<'_>) -> anyhow::Result { + conn.transaction_async(|conn| async move { + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL) + .await + .unwrap(); + let bb_count = schema::hw_baseboard_id::dsl::hw_baseboard_id + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .context("failed to count baseboards")?; + let caboose_count = schema::sw_caboose::dsl::sw_caboose + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .context("failed to count cabooses")?; + let rot_page_count = + schema::sw_root_of_trust_page::dsl::sw_root_of_trust_page + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .context("failed to count rot pages")?; + let baseboards = usize::try_from(bb_count) + .context("failed to convert baseboard count to usize")?; + let cabooses = usize::try_from(caboose_count) + .context("failed to convert caboose count to usize")?; + let rot_pages = usize::try_from(rot_page_count) + .context("failed to convert rot page count to usize")?; + Ok(Self { baseboards, cabooses, rot_pages }) + }) + .await + } } /// Tests inserting several collections, reading them back, and making sure @@ -1205,14 +1418,15 @@ mod test { .expect("failed to read collection back"); assert_eq!(collection1, collection_read); - // There ought to be no baseboards or cabooses in the databases from - // that collection. + // There ought to be no baseboards, cabooses, or RoT pages in the + // databases from that collection. assert_eq!(collection1.baseboards.len(), 0); assert_eq!(collection1.cabooses.len(), 0); - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_eq!(collection1.baseboards.len(), nbaseboards); - assert_eq!(collection1.cabooses.len(), ncabooses); + assert_eq!(collection1.rot_pages.len(), 0); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_eq!(collection1.baseboards.len(), coll_counts.baseboards); + assert_eq!(collection1.cabooses.len(), coll_counts.cabooses); + assert_eq!(collection1.rot_pages.len(), coll_counts.rot_pages); // Now insert a more complex collection, write it to the database, and // read it back. @@ -1227,14 +1441,16 @@ mod test { .await .expect("failed to read collection back"); assert_eq!(collection2, collection_read); - // Verify that we have exactly the set of cabooses and baseboards in the - // databases that came from this first non-empty collection. + // Verify that we have exactly the set of cabooses, baseboards, and RoT + // pages in the databases that came from this first non-empty + // collection. assert_ne!(collection2.baseboards.len(), collection1.baseboards.len()); assert_ne!(collection2.cabooses.len(), collection1.cabooses.len()); - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_eq!(collection2.baseboards.len(), nbaseboards); - assert_eq!(collection2.cabooses.len(), ncabooses); + assert_ne!(collection2.rot_pages.len(), collection1.rot_pages.len()); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_eq!(collection2.baseboards.len(), coll_counts.baseboards); + assert_eq!(collection2.cabooses.len(), coll_counts.cabooses); + assert_eq!(collection2.rot_pages.len(), coll_counts.rot_pages); // Check that we get an error on the limit being reached for // `read_all_or_nothing` @@ -1249,9 +1465,9 @@ mod test { .is_err()); // Now insert an equivalent collection again. Verify the distinct - // baseboards and cabooses again. This is important: the insertion - // process should re-use the baseboards and cabooses from the previous - // collection. + // baseboards, cabooses, and RoT pages again. This is important: the + // insertion process should re-use the baseboards, cabooses, and RoT + // pages from the previous collection. let Representative { builder, .. } = representative(); let collection3 = builder.build(); datastore @@ -1263,18 +1479,19 @@ mod test { .await .expect("failed to read collection back"); assert_eq!(collection3, collection_read); - // Verify that we have the same number of cabooses and baseboards, since - // those didn't change. + // Verify that we have the same number of cabooses, baseboards, and RoT + // pages, since those didn't change. assert_eq!(collection3.baseboards.len(), collection2.baseboards.len()); assert_eq!(collection3.cabooses.len(), collection2.cabooses.len()); - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_eq!(collection3.baseboards.len(), nbaseboards); - assert_eq!(collection3.cabooses.len(), ncabooses); + assert_eq!(collection3.rot_pages.len(), collection2.rot_pages.len()); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_eq!(collection3.baseboards.len(), coll_counts.baseboards); + assert_eq!(collection3.cabooses.len(), coll_counts.cabooses); + assert_eq!(collection3.rot_pages.len(), coll_counts.rot_pages); // Now insert a collection that's almost equivalent, but has an extra - // couple of baseboards and caboose. Verify that we re-use the existing - // ones, but still insert the new ones. + // couple of baseboards, one caboose, and one RoT page. Verify that we + // re-use the existing ones, but still insert the new ones. let Representative { mut builder, .. } = representative(); builder.found_sp_state( "test suite", @@ -1298,6 +1515,14 @@ mod test { nexus_inventory::examples::caboose("dummy"), ) .unwrap(); + builder + .found_rot_page( + &bb, + RotPageWhich::Cmpa, + "dummy", + nexus_inventory::examples::rot_page("dummy"), + ) + .unwrap(); let collection4 = builder.build(); datastore .inventory_insert_collection(&opctx, &collection4) @@ -1313,14 +1538,15 @@ mod test { collection4.baseboards.len(), collection3.baseboards.len() + 2 ); + assert_eq!(collection4.cabooses.len(), collection3.cabooses.len() + 1); assert_eq!( - collection4.cabooses.len(), - collection3.baseboards.len() + 1 + collection4.rot_pages.len(), + collection3.rot_pages.len() + 1 ); - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_eq!(collection4.baseboards.len(), nbaseboards); - assert_eq!(collection4.cabooses.len(), ncabooses); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_eq!(collection4.baseboards.len(), coll_counts.baseboards); + assert_eq!(collection4.cabooses.len(), coll_counts.cabooses); + assert_eq!(collection4.rot_pages.len(), coll_counts.rot_pages); // This time, go back to our earlier collection. This logically removes // some baseboards. They should still be present in the database, but @@ -1338,12 +1564,14 @@ mod test { assert_eq!(collection5, collection_read); assert_eq!(collection5.baseboards.len(), collection3.baseboards.len()); assert_eq!(collection5.cabooses.len(), collection3.cabooses.len()); + assert_eq!(collection5.rot_pages.len(), collection3.rot_pages.len()); assert_ne!(collection5.baseboards.len(), collection4.baseboards.len()); assert_ne!(collection5.cabooses.len(), collection4.cabooses.len()); - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_eq!(collection4.baseboards.len(), nbaseboards); - assert_eq!(collection4.cabooses.len(), ncabooses); + assert_ne!(collection5.rot_pages.len(), collection4.rot_pages.len()); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_eq!(collection4.baseboards.len(), coll_counts.baseboards); + assert_eq!(collection4.cabooses.len(), coll_counts.cabooses); + assert_eq!(collection4.rot_pages.len(), coll_counts.rot_pages); // Try to insert the same collection again and make sure it fails. let error = datastore @@ -1536,10 +1764,10 @@ mod test { .expect("failed to check that tables were empty"); // We currently keep the baseboard ids and sw_cabooses around. - let (nbaseboards, ncabooses) = - count_baseboards_cabooses(&conn).await.unwrap(); - assert_ne!(nbaseboards, 0); - assert_ne!(ncabooses, 0); + let coll_counts = CollectionCounts::new(&conn).await.unwrap(); + assert_ne!(coll_counts.baseboards, 0); + assert_ne!(coll_counts.cabooses, 0); + assert_ne!(coll_counts.rot_pages, 0); // Clean up. db.cleanup().await.unwrap(); diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 202aff49b2..6bb63cf9f7 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -6,6 +6,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +base64.workspace = true chrono.workspace = true gateway-client.workspace = true gateway-messages.workspace = true diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index ad008ee4df..188a48b553 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -19,6 +19,9 @@ use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseFound; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; +use nexus_types::inventory::RotPage; +use nexus_types::inventory::RotPageFound; +use nexus_types::inventory::RotPageWhich; use nexus_types::inventory::RotState; use nexus_types::inventory::ServiceProcessor; use std::collections::BTreeMap; @@ -39,10 +42,13 @@ pub struct CollectionBuilder { collector: String, baseboards: BTreeSet>, cabooses: BTreeSet>, + rot_pages: BTreeSet>, sps: BTreeMap, ServiceProcessor>, rots: BTreeMap, RotState>, cabooses_found: BTreeMap, CabooseFound>>, + rot_pages_found: + BTreeMap, RotPageFound>>, } impl CollectionBuilder { @@ -58,9 +64,11 @@ impl CollectionBuilder { collector: collector.to_owned(), baseboards: BTreeSet::new(), cabooses: BTreeSet::new(), + rot_pages: BTreeSet::new(), sps: BTreeMap::new(), rots: BTreeMap::new(), cabooses_found: BTreeMap::new(), + rot_pages_found: BTreeMap::new(), } } @@ -78,9 +86,11 @@ impl CollectionBuilder { collector: self.collector, baseboards: self.baseboards, cabooses: self.cabooses, + rot_pages: self.rot_pages, sps: self.sps, rots: self.rots, cabooses_found: self.cabooses_found, + rot_pages_found: self.rot_pages_found, } } @@ -251,6 +261,75 @@ impl CollectionBuilder { } } + /// Returns true if we already found the root of trust page for `which` for + /// baseboard `baseboard` + /// + /// This is used to avoid requesting it multiple times (from multiple MGS + /// instances). + pub fn found_rot_page_already( + &self, + baseboard: &BaseboardId, + which: RotPageWhich, + ) -> bool { + self.rot_pages_found + .get(&which) + .map(|map| map.contains_key(baseboard)) + .unwrap_or(false) + } + + /// Record the given root of trust page found for the given baseboard + /// + /// The baseboard must previously have been reported using + /// `found_sp_state()`. + /// + /// `source` is an arbitrary string for debugging that describes the MGS + /// that reported this data (generally a URL string). + pub fn found_rot_page( + &mut self, + baseboard: &BaseboardId, + which: RotPageWhich, + source: &str, + page: RotPage, + ) -> Result<(), anyhow::Error> { + // Normalize the page contents: i.e., if we've seen this exact page + // before, use the same record from before. Otherwise, make a new one. + let sw_rot_page = Self::normalize_item(&mut self.rot_pages, page); + let (baseboard, _) = + self.sps.get_key_value(baseboard).ok_or_else(|| { + anyhow!( + "reporting rot page for unknown baseboard: {:?} ({:?})", + baseboard, + sw_rot_page + ) + })?; + let by_id = self.rot_pages_found.entry(which).or_default(); + if let Some(previous) = by_id.insert( + baseboard.clone(), + RotPageFound { + time_collected: now(), + source: source.to_owned(), + page: sw_rot_page.clone(), + }, + ) { + let error = if *previous.page == *sw_rot_page { + anyhow!("reported multiple times (same value)",) + } else { + anyhow!( + "reported rot page multiple times (previously {:?}, \ + now {:?})", + previous, + sw_rot_page + ) + }; + Err(error.context(format!( + "baseboard {:?} rot page {:?}", + baseboard, which + ))) + } else { + Ok(()) + } + } + /// Helper function for normalizing items /// /// If `item` (or its equivalent) is not already in `items`, insert it. @@ -301,6 +380,8 @@ mod test { use crate::examples::representative; use crate::examples::sp_state; use crate::examples::Representative; + use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; + use base64::Engine; use gateway_client::types::PowerState; use gateway_client::types::RotSlot; use gateway_client::types::RotState; @@ -310,6 +391,8 @@ mod test { use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseWhich; + use nexus_types::inventory::RotPage; + use nexus_types::inventory::RotPageWhich; // Verify the contents of an empty collection. #[test] @@ -326,9 +409,11 @@ mod test { assert_eq!(collection.collector, "test_empty"); assert!(collection.baseboards.is_empty()); assert!(collection.cabooses.is_empty()); + assert!(collection.rot_pages.is_empty()); assert!(collection.sps.is_empty()); assert!(collection.rots.is_empty()); assert!(collection.cabooses_found.is_empty()); + assert!(collection.rot_pages_found.is_empty()); } // Simple test of a single, fairly typical collection that contains just @@ -428,6 +513,33 @@ mod test { } assert!(collection.cabooses.contains(&common_caboose)); + // Verify the common RoT page data. + let common_rot_page_baseboards = [&sled1_bb, &sled3_bb, &switch]; + let common_rot_page = nexus_types::inventory::RotPage { + // base64("1") == "MQ==" + data_base64: "MQ==".to_string(), + }; + for bb in &common_rot_page_baseboards { + let _ = collection.sps.get(*bb).unwrap(); + let p0 = collection.rot_page_for(RotPageWhich::Cmpa, bb).unwrap(); + let p1 = + collection.rot_page_for(RotPageWhich::CfpaActive, bb).unwrap(); + let p2 = collection + .rot_page_for(RotPageWhich::CfpaInactive, bb) + .unwrap(); + let p3 = + collection.rot_page_for(RotPageWhich::CfpaScratch, bb).unwrap(); + assert_eq!(p0.source, "test suite"); + assert_eq!(*p0.page, common_rot_page); + assert_eq!(p1.source, "test suite"); + assert_eq!(*p1.page, common_rot_page); + assert_eq!(p2.source, "test suite"); + assert_eq!(*p2.page, common_rot_page); + assert_eq!(p3.source, "test suite"); + assert_eq!(*p3.page, common_rot_page); + } + assert!(collection.rot_pages.contains(&common_rot_page)); + // Verify the specific, different data for the healthy SPs and RoTs that // we reported. // sled1 @@ -474,6 +586,20 @@ mod test { ); assert_eq!(rot.transient_boot_preference, Some(RotSlot::B)); + // sled 2 did not have any RoT pages reported + assert!(collection + .rot_page_for(RotPageWhich::Cmpa, &sled2_bb) + .is_none()); + assert!(collection + .rot_page_for(RotPageWhich::CfpaActive, &sled2_bb) + .is_none()); + assert!(collection + .rot_page_for(RotPageWhich::CfpaInactive, &sled2_bb) + .is_none()); + assert!(collection + .rot_page_for(RotPageWhich::CfpaScratch, &sled2_bb) + .is_none()); + // switch let sp = collection.sps.get(&switch).unwrap(); assert_eq!(sp.source, "fake MGS 2"); @@ -544,6 +670,38 @@ mod test { assert!(collection.cabooses.contains(c)); assert_eq!(c.board, "board_psc_rot_b"); + // The PSC also has four different RoT pages! + let p = + &collection.rot_page_for(RotPageWhich::Cmpa, &psc).unwrap().page; + assert_eq!( + BASE64_STANDARD.decode(&p.data_base64).unwrap(), + b"psc cmpa" + ); + let p = &collection + .rot_page_for(RotPageWhich::CfpaActive, &psc) + .unwrap() + .page; + assert_eq!( + BASE64_STANDARD.decode(&p.data_base64).unwrap(), + b"psc cfpa active" + ); + let p = &collection + .rot_page_for(RotPageWhich::CfpaInactive, &psc) + .unwrap() + .page; + assert_eq!( + BASE64_STANDARD.decode(&p.data_base64).unwrap(), + b"psc cfpa inactive" + ); + let p = &collection + .rot_page_for(RotPageWhich::CfpaScratch, &psc) + .unwrap() + .page; + assert_eq!( + BASE64_STANDARD.decode(&p.data_base64).unwrap(), + b"psc cfpa scratch" + ); + // Verify the reported SP state for sled3, which did not have a healthy // RoT, nor any cabooses. let sp = collection.sps.get(&sled3_bb).unwrap(); @@ -565,8 +723,9 @@ mod test { assert_eq!(collection.sps.len(), collection.rots.len() + 1); // There should be five cabooses: the four used for the PSC (see above), - // plus the common one. + // plus the common one; same for RoT pages. assert_eq!(collection.cabooses.len(), 5); + assert_eq!(collection.rot_pages.len(), 5); } // Exercises all the failure cases that shouldn't happen in real systems. @@ -704,7 +863,7 @@ mod test { assert_eq!(error.to_string(), error2.to_string(),); // report the same caboose twice with the same contents - let _ = builder + builder .found_caboose( &sled1_bb, CabooseWhich::SpSlot0, @@ -747,12 +906,74 @@ mod test { )); assert!(message.contains(", now ")); + // report RoT page for an unknown baseboard + let rot_page1 = RotPage { data_base64: "page1".to_string() }; + let rot_page2 = RotPage { data_base64: "page2".to_string() }; + assert!(!builder + .found_rot_page_already(&bogus_baseboard, RotPageWhich::Cmpa)); + let error = builder + .found_rot_page( + &bogus_baseboard, + RotPageWhich::Cmpa, + "dummy", + rot_page1.clone(), + ) + .unwrap_err(); + assert_eq!( + error.to_string(), + "reporting rot page for unknown baseboard: \ + BaseboardId { part_number: \"p1\", serial_number: \"bogus\" } \ + (RotPage { data_base64: \"page1\" })" + ); + assert!(!builder + .found_rot_page_already(&bogus_baseboard, RotPageWhich::Cmpa)); + + // report the same rot page twice with the same contents + builder + .found_rot_page( + &sled1_bb, + RotPageWhich::Cmpa, + "dummy", + rot_page1.clone(), + ) + .unwrap(); + let error = builder + .found_rot_page( + &sled1_bb, + RotPageWhich::Cmpa, + "dummy", + rot_page1.clone(), + ) + .unwrap_err(); + assert_eq!( + format!("{:#}", error), + "baseboard BaseboardId { part_number: \"model1\", \ + serial_number: \"s1\" } rot page Cmpa: reported multiple \ + times (same value)" + ); + // report the same rot page again with different contents + let error = builder + .found_rot_page( + &sled1_bb, + RotPageWhich::Cmpa, + "dummy", + rot_page2.clone(), + ) + .unwrap_err(); + let message = format!("{:#}", error); + println!("found error: {}", message); + assert!(message.contains( + "rot page Cmpa: reported rot page multiple times (previously" + )); + assert!(message.contains(", now RotPage { data_base64: \"page2\" }")); + // We should still get a valid collection. let collection = builder.build(); println!("{:#?}", collection); assert_eq!(collection.collector, "test_problems"); - // We should still have the one sled and its SP slot0 caboose. + // We should still have the one sled, its SP slot0 caboose, and its Cmpa + // RoT page. assert!(collection.baseboards.contains(&sled1_bb)); let _ = collection.sps.get(&sled1_bb).unwrap(); let caboose = @@ -769,6 +990,28 @@ mod test { assert!(collection .caboose_for(CabooseWhich::RotSlotB, &sled1_bb) .is_none()); + let rot_page = + collection.rot_page_for(RotPageWhich::Cmpa, &sled1_bb).unwrap(); + assert!(collection.rot_pages.contains(&rot_page.page)); + + // TODO-correctness Is this test correct? We reported the same RoT page + // with different data (rot_page1, then rot_page2). The second + // `found_rot_page` returned an error, but we overwrote the original + // data and did not record the error in `collection.errors`. Should we + // either have kept the original data or returned Ok while returning an + // error? It seems a little strange we returned Err but accepted the new + // data. + assert_eq!(rot_page.page.data_base64, rot_page2.data_base64); + + assert!(collection + .rot_page_for(RotPageWhich::CfpaActive, &sled1_bb) + .is_none()); + assert!(collection + .rot_page_for(RotPageWhich::CfpaInactive, &sled1_bb) + .is_none()); + assert!(collection + .rot_page_for(RotPageWhich::CfpaScratch, &sled1_bb) + .is_none()); // We should see an error. assert_eq!( diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 1676f44083..7c6570436a 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -6,8 +6,13 @@ use crate::builder::CollectionBuilder; use anyhow::Context; +use gateway_client::types::GetCfpaParams; +use gateway_client::types::RotCfpaSlot; +use gateway_messages::SpComponent; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; +use nexus_types::inventory::RotPage; +use nexus_types::inventory::RotPageWhich; use slog::{debug, error}; use std::sync::Arc; use strum::IntoEnumIterator; @@ -195,6 +200,84 @@ impl Collector { ); } } + + // For each kind of RoT page that we care about, if it hasn't been + // fetched already, fetch it and record it. Generally, we'd only + // get here for the first MGS client. Assuming that one succeeds, + // the other(s) will skip this loop. + for which in RotPageWhich::iter() { + if self.in_progress.found_rot_page_already(&baseboard_id, which) + { + continue; + } + + let component = SpComponent::ROT.const_as_str(); + + let result = match which { + RotPageWhich::Cmpa => client + .sp_rot_cmpa_get(sp.type_, sp.slot, component) + .await + .map(|response| response.into_inner().base64_data), + RotPageWhich::CfpaActive => client + .sp_rot_cfpa_get( + sp.type_, + sp.slot, + component, + &GetCfpaParams { slot: RotCfpaSlot::Active }, + ) + .await + .map(|response| response.into_inner().base64_data), + RotPageWhich::CfpaInactive => client + .sp_rot_cfpa_get( + sp.type_, + sp.slot, + component, + &GetCfpaParams { slot: RotCfpaSlot::Inactive }, + ) + .await + .map(|response| response.into_inner().base64_data), + RotPageWhich::CfpaScratch => client + .sp_rot_cfpa_get( + sp.type_, + sp.slot, + component, + &GetCfpaParams { slot: RotCfpaSlot::Scratch }, + ) + .await + .map(|response| response.into_inner().base64_data), + } + .with_context(|| { + format!( + "MGS {:?}: SP {:?}: rot page {:?}", + client.baseurl(), + sp, + which + ) + }); + + let page = match result { + Err(error) => { + self.in_progress.found_error(error); + continue; + } + Ok(data_base64) => RotPage { data_base64 }, + }; + if let Err(error) = self.in_progress.found_rot_page( + &baseboard_id, + which, + client.baseurl(), + page, + ) { + error!( + &self.log, + "error reporting rot page: {:?} {:?} {:?}: {:#}", + baseboard_id, + which, + client.baseurl(), + error + ); + } + } } } } @@ -236,6 +319,11 @@ mod test { .unwrap(); } + write!(&mut s, "\nrot pages:\n").unwrap(); + for p in &collection.rot_pages { + write!(&mut s, " data_base64 {:?}\n", p.data_base64).unwrap(); + } + // All we really need to check here is that we're reporting the right // SPs, RoTs, and cabooses. The actual SP data, RoT data, and caboose // data comes straight from MGS. And proper handling of that data is @@ -272,6 +360,22 @@ mod test { } } + write!(&mut s, "\nrot pages found:\n").unwrap(); + for (kind, bb_to_found) in &collection.rot_pages_found { + for (bb, found) in bb_to_found { + write!( + &mut s, + " {:?} baseboard part {:?} serial {:?}: \ + data_base64 {:?}\n", + kind, + bb.part_number, + bb.serial_number, + found.page.data_base64 + ) + .unwrap(); + } + } + write!(&mut s, "\nerrors:\n").unwrap(); for e in &collection.errors { // Some error strings have OS error numbers in them. We want to diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 52aca397bb..0ce3712942 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -13,6 +13,8 @@ use gateway_client::types::SpState; use gateway_client::types::SpType; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; +use nexus_types::inventory::RotPage; +use nexus_types::inventory::RotPageWhich; use std::sync::Arc; use strum::IntoEnumIterator; @@ -164,7 +166,7 @@ pub fn representative() -> Representative { for bb in &common_caboose_baseboards { for which in CabooseWhich::iter() { assert!(!builder.found_caboose_already(bb, which)); - let _ = builder + builder .found_caboose(bb, which, "test suite", caboose("1")) .unwrap(); assert!(builder.found_caboose_already(bb, which)); @@ -174,7 +176,7 @@ pub fn representative() -> Representative { // For the PSC, use different cabooses for both slots of both the SP and // RoT, just to exercise that we correctly keep track of different // cabooses. - let _ = builder + builder .found_caboose( &psc_bb, CabooseWhich::SpSlot0, @@ -182,7 +184,7 @@ pub fn representative() -> Representative { caboose("psc_sp_0"), ) .unwrap(); - let _ = builder + builder .found_caboose( &psc_bb, CabooseWhich::SpSlot1, @@ -190,7 +192,7 @@ pub fn representative() -> Representative { caboose("psc_sp_1"), ) .unwrap(); - let _ = builder + builder .found_caboose( &psc_bb, CabooseWhich::RotSlotA, @@ -198,7 +200,7 @@ pub fn representative() -> Representative { caboose("psc_rot_a"), ) .unwrap(); - let _ = builder + builder .found_caboose( &psc_bb, CabooseWhich::RotSlotB, @@ -209,6 +211,59 @@ pub fn representative() -> Representative { // We deliberately provide no cabooses for sled3. + // Report some RoT pages. + + // We'll use the same RoT pages for most of these components, although + // that's not possible in a real system. We deliberately construct a new + // value each time to make sure the builder correctly normalizes it. + let common_rot_page_baseboards = [&sled1_bb, &sled3_bb, &switch1_bb]; + for bb in common_rot_page_baseboards { + for which in RotPageWhich::iter() { + assert!(!builder.found_rot_page_already(bb, which)); + builder + .found_rot_page(bb, which, "test suite", rot_page("1")) + .unwrap(); + assert!(builder.found_rot_page_already(bb, which)); + } + } + + // For the PSC, use different RoT page data for each kind of page, just to + // exercise that we correctly keep track of different data values. + builder + .found_rot_page( + &psc_bb, + RotPageWhich::Cmpa, + "test suite", + rot_page("psc cmpa"), + ) + .unwrap(); + builder + .found_rot_page( + &psc_bb, + RotPageWhich::CfpaActive, + "test suite", + rot_page("psc cfpa active"), + ) + .unwrap(); + builder + .found_rot_page( + &psc_bb, + RotPageWhich::CfpaInactive, + "test suite", + rot_page("psc cfpa inactive"), + ) + .unwrap(); + builder + .found_rot_page( + &psc_bb, + RotPageWhich::CfpaScratch, + "test suite", + rot_page("psc cfpa scratch"), + ) + .unwrap(); + + // We deliberately provide no RoT pages for sled2. + Representative { builder, sleds: [sled1_bb, sled2_bb, sled3_bb], @@ -252,3 +307,10 @@ pub fn caboose(unique: &str) -> SpComponentCaboose { version: format!("version_{}", unique), } } + +pub fn rot_page(unique: &str) -> RotPage { + use base64::Engine; + RotPage { + data_base64: base64::engine::general_purpose::STANDARD.encode(unique), + } +} diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 76b929bfba..b9894ff184 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -10,6 +10,16 @@ cabooses: board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" +rot pages: + data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + SPs: baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" @@ -40,4 +50,22 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" +rot pages found: + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + errors: diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index c61d2e7c29..a50e24ca30 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -10,6 +10,16 @@ cabooses: board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" +rot pages: + data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + SPs: baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" @@ -40,5 +50,23 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" +rot pages found: + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + errors: error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error <> diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 112eec3a65..19c323d894 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -56,6 +56,11 @@ pub struct Collection { /// /// In practice, these will be inserted into the `sw_caboose` table. pub cabooses: BTreeSet>, + /// unique root of trust page contents that were found in this collection + /// + /// In practice, these will be inserted into the `sw_root_of_trust_page` + /// table. + pub rot_pages: BTreeSet>, /// all service processors, keyed by baseboard id /// @@ -73,6 +78,14 @@ pub struct Collection { /// In practice, these will be inserted into the `inv_caboose` table. pub cabooses_found: BTreeMap, CabooseFound>>, + /// all root of trust page contents found, keyed first by the kind of page + /// (`RotPageWhich`), then the baseboard id of the sled where they were + /// found + /// + /// In practice, these will be inserted into the `inv_root_of_trust_page` + /// table. + pub rot_pages_found: + BTreeMap, RotPageFound>>, } impl Collection { @@ -85,6 +98,16 @@ impl Collection { .get(&which) .and_then(|by_bb| by_bb.get(baseboard_id)) } + + pub fn rot_page_for( + &self, + which: RotPageWhich, + baseboard_id: &BaseboardId, + ) -> Option<&RotPageFound> { + self.rot_pages_found + .get(&which) + .and_then(|by_bb| by_bb.get(baseboard_id)) + } } /// A unique baseboard id found during a collection @@ -177,3 +200,57 @@ pub enum CabooseWhich { RotSlotA, RotSlotB, } + +/// Root of trust page contents found during a collection +/// +/// These are normalized in the database. Each distinct `RotPage` is assigned a +/// uuid and shared across many possible collections that reference it. +#[derive(Clone, Debug, Ord, Eq, PartialOrd, PartialEq)] +pub struct RotPage { + pub data_base64: String, +} + +/// Indicates that a particular `RotPage` was found (at a particular time from a +/// particular source, but these are only for debugging) +#[derive(Clone, Debug, Ord, Eq, PartialOrd, PartialEq)] +pub struct RotPageFound { + pub time_collected: DateTime, + pub source: String, + pub page: Arc, +} + +/// Describes which root of trust page this is +#[derive(Clone, Copy, Debug, EnumIter, PartialEq, Eq, PartialOrd, Ord)] +pub enum RotPageWhich { + Cmpa, + CfpaActive, + CfpaInactive, + CfpaScratch, +} + +/// Trait to convert between the two MGS root of trust page types and a tuple of +/// `([RotPageWhich], [RotPage])`. +/// +/// This cannot use the standard `From` trait due to orphan rules: we do not own +/// the `gateway_client` type, and tuples are always considered foreign. +pub trait IntoRotPage { + fn into_rot_page(self) -> (RotPageWhich, RotPage); +} + +impl IntoRotPage for gateway_client::types::RotCmpa { + fn into_rot_page(self) -> (RotPageWhich, RotPage) { + (RotPageWhich::Cmpa, RotPage { data_base64: self.base64_data }) + } +} + +impl IntoRotPage for gateway_client::types::RotCfpa { + fn into_rot_page(self) -> (RotPageWhich, RotPage) { + use gateway_client::types::RotCfpaSlot; + let which = match self.slot { + RotCfpaSlot::Active => RotPageWhich::CfpaActive, + RotCfpaSlot::Inactive => RotPageWhich::CfpaInactive, + RotCfpaSlot::Scratch => RotPageWhich::CfpaScratch, + }; + (which, RotPage { data_base64: self.base64_data }) + } +} diff --git a/schema/crdb/13.0.0/up1.sql b/schema/crdb/13.0.0/up1.sql new file mode 100644 index 0000000000..c6ca3bcb13 --- /dev/null +++ b/schema/crdb/13.0.0/up1.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS omicron.public.sw_root_of_trust_page ( + id UUID PRIMARY KEY, + data_base64 TEXT NOT NULL +); diff --git a/schema/crdb/13.0.0/up2.sql b/schema/crdb/13.0.0/up2.sql new file mode 100644 index 0000000000..5d8e775038 --- /dev/null +++ b/schema/crdb/13.0.0/up2.sql @@ -0,0 +1,2 @@ +CREATE UNIQUE INDEX IF NOT EXISTS root_of_trust_page_properties + on omicron.public.sw_root_of_trust_page (data_base64); diff --git a/schema/crdb/13.0.0/up3.sql b/schema/crdb/13.0.0/up3.sql new file mode 100644 index 0000000000..9fb407e7b9 --- /dev/null +++ b/schema/crdb/13.0.0/up3.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.root_of_trust_page_which AS ENUM ( + 'cmpa', + 'cfpa_active', + 'cfpa_inactive', + 'cfpa_scratch' +); diff --git a/schema/crdb/13.0.0/up4.sql b/schema/crdb/13.0.0/up4.sql new file mode 100644 index 0000000000..9d227c7427 --- /dev/null +++ b/schema/crdb/13.0.0/up4.sql @@ -0,0 +1,17 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_root_of_trust_page ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + -- which system this SP reports it is part of + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID NOT NULL, + -- when this observation was made + time_collected TIMESTAMPTZ NOT NULL, + -- which MGS instance reported this data + source TEXT NOT NULL, + + which omicron.public.root_of_trust_page_which NOT NULL, + sw_root_of_trust_page_id UUID NOT NULL, + + PRIMARY KEY (inv_collection_id, hw_baseboard_id, which) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 7bd83439e8..fc3bc37fd7 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2627,13 +2627,20 @@ CREATE TABLE IF NOT EXISTS omicron.public.sw_caboose ( board TEXT NOT NULL, git_commit TEXT NOT NULL, name TEXT NOT NULL, - -- The MGS response that provides this field indicates that it can be NULL. - -- But that's only to support old software that we no longer support. version TEXT NOT NULL ); CREATE UNIQUE INDEX IF NOT EXISTS caboose_properties on omicron.public.sw_caboose (board, git_commit, name, version); +/* root of trust pages: this table assigns unique ids to distinct RoT CMPA + and CFPA page contents, each of which is a 512-byte blob */ +CREATE TABLE IF NOT EXISTS omicron.public.sw_root_of_trust_page ( + id UUID PRIMARY KEY, + data_base64 TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS root_of_trust_page_properties + on omicron.public.sw_root_of_trust_page (data_base64); + /* Inventory Collections */ -- list of all collections @@ -2741,6 +2748,32 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_caboose ( PRIMARY KEY (inv_collection_id, hw_baseboard_id, which) ); +CREATE TYPE IF NOT EXISTS omicron.public.root_of_trust_page_which AS ENUM ( + 'cmpa', + 'cfpa_active', + 'cfpa_inactive', + 'cfpa_scratch' +); + +-- root of trust key signing pages found +CREATE TABLE IF NOT EXISTS omicron.public.inv_root_of_trust_page ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + -- which system this SP reports it is part of + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID NOT NULL, + -- when this observation was made + time_collected TIMESTAMPTZ NOT NULL, + -- which MGS instance reported this data + source TEXT NOT NULL, + + which omicron.public.root_of_trust_page_which NOT NULL, + sw_root_of_trust_page_id UUID NOT NULL, + + PRIMARY KEY (inv_collection_id, hw_baseboard_id, which) +); + /*******************************************************************/ /* @@ -2919,7 +2952,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '12.0.0', NULL) + ( TRUE, NOW(), NOW(), '13.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 0c753b62b5..635e8fde6b 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -22,8 +22,11 @@ use futures::Future; use gateway_messages::ignition::{self, LinkEvents}; use gateway_messages::sp_impl::SpHandler; use gateway_messages::sp_impl::{BoundsChecked, DeviceDescription}; +use gateway_messages::CfpaPage; use gateway_messages::ComponentAction; use gateway_messages::Header; +use gateway_messages::RotRequest; +use gateway_messages::RotResponse; use gateway_messages::RotSlotId; use gateway_messages::SpComponent; use gateway_messages::SpError; @@ -1371,10 +1374,18 @@ impl SpHandler for Handler { fn read_rot( &mut self, - _request: gateway_messages::RotRequest, - _buf: &mut [u8], - ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + request: RotRequest, + buf: &mut [u8], + ) -> std::result::Result { + let dummy_page = match request { + RotRequest::ReadCmpa => "gimlet-cmpa", + RotRequest::ReadCfpa(CfpaPage::Active) => "gimlet-cfpa-active", + RotRequest::ReadCfpa(CfpaPage::Inactive) => "gimlet-cfpa-inactive", + RotRequest::ReadCfpa(CfpaPage::Scratch) => "gimlet-cfpa-scratch", + }; + buf[..dummy_page.len()].copy_from_slice(dummy_page.as_bytes()); + buf[dummy_page.len()..].fill(0); + Ok(RotResponse::Ok) } } diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index 46fe8b5df7..19e84ffc64 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -27,6 +27,7 @@ use gateway_messages::ignition::LinkEvents; use gateway_messages::sp_impl::BoundsChecked; use gateway_messages::sp_impl::DeviceDescription; use gateway_messages::sp_impl::SpHandler; +use gateway_messages::CfpaPage; use gateway_messages::ComponentAction; use gateway_messages::ComponentDetails; use gateway_messages::DiscoverResponse; @@ -34,6 +35,8 @@ use gateway_messages::IgnitionCommand; use gateway_messages::IgnitionState; use gateway_messages::MgsError; use gateway_messages::PowerState; +use gateway_messages::RotRequest; +use gateway_messages::RotResponse; use gateway_messages::RotSlotId; use gateway_messages::SpComponent; use gateway_messages::SpError; @@ -1150,10 +1153,18 @@ impl SpHandler for Handler { fn read_rot( &mut self, - _request: gateway_messages::RotRequest, - _buf: &mut [u8], - ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + request: RotRequest, + buf: &mut [u8], + ) -> std::result::Result { + let dummy_page = match request { + RotRequest::ReadCmpa => "sidecar-cmpa", + RotRequest::ReadCfpa(CfpaPage::Active) => "sidecar-cfpa-active", + RotRequest::ReadCfpa(CfpaPage::Inactive) => "sidecar-cfpa-inactive", + RotRequest::ReadCfpa(CfpaPage::Scratch) => "sidecar-cfpa-scratch", + }; + buf[..dummy_page.len()].copy_from_slice(dummy_page.as_bytes()); + buf[dummy_page.len()..].fill(0); + Ok(RotResponse::Ok) } } From b07a8f593325efe97ddb526c2725d45d480bf7e6 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 21 Nov 2023 14:59:21 -0800 Subject: [PATCH 012/186] [meta] a few changes to prevent duplicate dep builds (#4535) This PR has a few changes that make builds and test runs significantly faster: 1. Remove `xtask` from the list of default-members. This makes it so that `cargo nextest run` and `cargo nextest run -p ` use more dependency feature sets in common. 2. Move `opt-level` settings from `profile.test` to `profile.dev`. Again, this results in more cache hits. 3. Set `profile.dev.panic` to `unwind`. This is to unify build units across dev and test builds: tests are always built with `panic = "unwind"` so that proper backtraces can be printed out. Release builds stay as `abort`. 4. For a belt-and-suspenders approach, make the `crdb-seed` script use the `test` profile. If there are any divergences between `dev` and `test` in the future, then crdb-seed should share its build cache with the tests it was presumably invoked for. 5. Set `profile.dev.build-override.debug` to `line-tables-only`. This, along with 3, means that target (normal/dev) and build (host) dependencies are now unified. All of this comes together for a pretty sweet improvement. See #4392 for more details and how I investigated this issue. ## Impact With a fresh build on Linux with mold, I ran three commands in sequence: 1. `cargo nextest run --no-run` 2. `cargo nextest run -p nexus-db-queries` 3. `cargo build -p omicron-nexus` The results were: | **command** | **phase** | **before** | **before, cumul.** | **after** | **after, cumul.** | |-----------------------------------------|-------------------|-----------:|-------------------:|----------:|------------------:| | `cargo nextest run` | build | 173s | 173s | 158s | 158s | | `cargo nextest run -p nexus-db-queries` | build | 61s | 234s | 51s | 209s | | `cargo nextest run -p nexus-db-queries` | `crdb-seed` build | 21s | 255s | 1s | 210s | | `cargo build -p omicron-nexus` | build | 99s | 354s | 69s | 279s | So the cumulative time spent on these three commands went from 354s to 279s. That's a 1.26x speedup. And this should also make other commands better as well (omicron-nexus is a bit of a weird case because it takes a very long time to compile by itself, and that 69s in the "after" column is entirely building omicron-nexus). --- .config/nextest.toml | 4 +- Cargo.toml | 122 ++++++++++++++++++++++++------------------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 79774e3658..ef296d7ef8 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -17,7 +17,9 @@ setup = 'crdb-seed' fail-fast = false [script.crdb-seed] -command = 'cargo run -p crdb-seed' +# Use the test profile for this executable since that's how almost all +# invocations of nextest happen. +command = 'cargo run -p crdb-seed --profile test' # The ClickHouse cluster tests currently rely on a hard-coded set of ports for # the nodes in the cluster. We would like to relax this in the future, at which diff --git a/Cargo.toml b/Cargo.toml index fb220ba53d..f3da0381df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,7 +91,9 @@ default-members = [ "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", - "dev-tools/xtask", + # Do not include xtask in the list of default members, because this causes + # hakari to not work as well and build times to be longer. + # See omicron#4392. "dns-server", "gateway-cli", "gateway-test-utils", @@ -391,13 +393,27 @@ zeroize = { version = "1.6.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } zone = { version = "0.3", default-features = false, features = ["async"] } +# NOTE: The test profile inherits from the dev profile, so settings under +# profile.dev get inherited. AVOID setting anything under profile.test: that +# will cause dev and test builds to diverge, which will cause more Cargo build +# cache misses. + [profile.dev] -panic = "abort" +# Note: This used to be panic = "abort" earlier, but that caused a lot of +# duplicate dependency builds. Letting panic be "unwind" causes dependencies +# across `cargo test` and `cargo run` to be unified. See omicron#4392. +panic = "unwind" + # See https://github.com/oxidecomputer/omicron/issues/4009 for some background context here. # By reducing the debug level (though keeping enough to have meaningful # backtraces), we reduce incremental build time and binary size significantly. debug = "line-tables-only" +[profile.dev.build-override] +# Setting this to line-tables-only results in a large improvement in build +# times, because it allows target and host dependencies to be unified. +debug = "line-tables-only" + # `bindgen` is used by `samael`'s build script; building it with optimizations # makes that build script run ~5x faster, more than offsetting the additional # build time added to `bindgen` itself. @@ -428,112 +444,108 @@ panic = "abort" # proptest based test generation and shrinking is expensive. Let's optimize it. [profile.dev.package.proptest] opt-level = 3 -[profile.test.package.proptest] -opt-level = 3 [profile.dev.package.bootstore] opt-level = 3 -[profile.test.package.bootstore] -opt-level = 3 # Crypto stuff always needs optimizations -[profile.test.package.sha3] +[profile.dev.package.sha3] opt-level = 3 -[profile.test.package.sha2] +[profile.dev.package.sha2] opt-level = 3 -[profile.test.package.hkdf] +[profile.dev.package.hkdf] opt-level = 3 -[profile.test.package.chacha20poly1305] +[profile.dev.package.chacha20poly1305] opt-level = 3 -[profile.test.package.chacha20] +[profile.dev.package.chacha20] opt-level = 3 -[profile.test.package.vsss-rs] +[profile.dev.package.vsss-rs] opt-level = 3 -[profile.test.package.curve25519-dalek] +[profile.dev.package.curve25519-dalek] opt-level = 3 -[profile.test.package.aead] +[profile.dev.package.aead] opt-level = 3 -[profile.test.package.aes] +[profile.dev.package.aes] opt-level = 3 -[profile.test.package.aes-gcm] +[profile.dev.package.aes-gcm] opt-level = 3 -[profile.test.package.bcrypt-pbkdf] +[profile.dev.package.bcrypt-pbkdf] opt-level = 3 -[profile.test.package.blake2] +[profile.dev.package.blake2] opt-level = 3 -[profile.test.package.blake2b_simd] +[profile.dev.package.blake2b_simd] opt-level = 3 -[profile.test.package.block-buffer] +[profile.dev.package.block-buffer] opt-level = 3 -[profile.test.package.block-padding] +[profile.dev.package.block-padding] opt-level = 3 -[profile.test.package.blowfish] +[profile.dev.package.blowfish] opt-level = 3 -[profile.test.package.constant_time_eq] +[profile.dev.package.constant_time_eq] opt-level = 3 -[profile.test.package.crypto-bigint] +[profile.dev.package.crypto-bigint] opt-level = 3 -[profile.test.package.crypto-common] +[profile.dev.package.crypto-common] opt-level = 3 -[profile.test.package.ctr] +[profile.dev.package.ctr] opt-level = 3 -[profile.test.package.cbc] +[profile.dev.package.cbc] opt-level = 3 -[profile.test.package.digest] +[profile.dev.package.digest] opt-level = 3 -[profile.test.package.ed25519] +[profile.dev.package.ed25519] opt-level = 3 -[profile.test.package.ed25519-dalek] +[profile.dev.package.ed25519-dalek] opt-level = 3 -[profile.test.package.elliptic-curve] +[profile.dev.package.elliptic-curve] opt-level = 3 -[profile.test.package.generic-array] +[profile.dev.package.generic-array] opt-level = 3 -[profile.test.package.getrandom] +[profile.dev.package.getrandom] opt-level = 3 -[profile.test.package.hmac] +[profile.dev.package.hmac] opt-level = 3 -[profile.test.package.lpc55_sign] +[profile.dev.package.lpc55_sign] opt-level = 3 -[profile.test.package.md5] +[profile.dev.package.md5] opt-level = 3 -[profile.test.package.md-5] +[profile.dev.package.md-5] opt-level = 3 -[profile.test.package.num-bigint] +[profile.dev.package.num-bigint] opt-level = 3 -[profile.test.package.num-bigint-dig] +[profile.dev.package.num-bigint-dig] opt-level = 3 -[profile.test.package.rand] +[profile.dev.package.rand] opt-level = 3 -[profile.test.package.rand_chacha] +[profile.dev.package.rand_chacha] opt-level = 3 -[profile.test.package.rand_core] +[profile.dev.package.rand_core] opt-level = 3 -[profile.test.package.rand_hc] +[profile.dev.package.rand_hc] opt-level = 3 -[profile.test.package.rand_xorshift] +[profile.dev.package.rand_xorshift] opt-level = 3 -[profile.test.package.rsa] +[profile.dev.package.rsa] opt-level = 3 -[profile.test.package.salty] +[profile.dev.package.salty] opt-level = 3 -[profile.test.package.signature] +[profile.dev.package.signature] opt-level = 3 -[profile.test.package.subtle] +[profile.dev.package.subtle] opt-level = 3 -[profile.test.package.tiny-keccak] +[profile.dev.package.tiny-keccak] opt-level = 3 -[profile.test.package.uuid] +[profile.dev.package.uuid] opt-level = 3 -[profile.test.package.cipher] +[profile.dev.package.cipher] opt-level = 3 -[profile.test.package.cpufeatures] +[profile.dev.package.cpufeatures] opt-level = 3 -[profile.test.package.poly1305] +[profile.dev.package.poly1305] opt-level = 3 -[profile.test.package.inout] +[profile.dev.package.inout] opt-level = 3 -[profile.test.package.keccak] +[profile.dev.package.keccak] opt-level = 3 # From c339fc7681bf1337325f5b2fb233f2d314d579ed Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 22 Nov 2023 02:00:31 +0000 Subject: [PATCH 013/186] Update Rust crate tokio-tungstenite to 0.20 (#4403) Co-authored-by: Rain --- Cargo.lock | 41 +++---------------- Cargo.toml | 2 +- gateway/src/serial_console.rs | 8 +++- .../tests/integration_tests/serial_console.rs | 3 +- 4 files changed, 14 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c9c31a2ac..07f804b03d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2443,7 +2443,7 @@ dependencies = [ "slog-term", "termios", "tokio", - "tokio-tungstenite 0.18.0", + "tokio-tungstenite", "uuid", ] @@ -4575,7 +4575,7 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", - "tokio-tungstenite 0.18.0", + "tokio-tungstenite", "toml 0.8.8", "uuid", ] @@ -6122,7 +6122,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "tokio-tungstenite 0.20.1", + "tokio-tungstenite", "uuid", ] @@ -6152,7 +6152,7 @@ dependencies = [ "slog-term", "thiserror", "tokio", - "tokio-tungstenite 0.20.1", + "tokio-tungstenite", "uuid", ] @@ -8522,18 +8522,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54319c93411147bced34cb5609a80e0a8e44c5999c93903a81cd866630ec0bfd" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite 0.18.0", -] - [[package]] name = "tokio-tungstenite" version = "0.20.1" @@ -8543,7 +8531,7 @@ dependencies = [ "futures-util", "log", "tokio", - "tungstenite 0.20.1", + "tungstenite", ] [[package]] @@ -8901,25 +8889,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "tungstenite" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ee6ab729cd4cf0fd55218530c4522ed30b7b6081752839b68fcec8d0960788" -dependencies = [ - "base64 0.13.1", - "byteorder", - "bytes", - "http", - "httparse", - "log", - "rand 0.8.5", - "sha1", - "thiserror", - "url", - "utf-8", -] - [[package]] name = "tungstenite" version = "0.20.1" diff --git a/Cargo.toml b/Cargo.toml index f3da0381df..e4588efbde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -368,7 +368,7 @@ tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } tokio = "1.34.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" -tokio-tungstenite = "0.18" +tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.8" toml_edit = "0.21.0" diff --git a/gateway/src/serial_console.rs b/gateway/src/serial_console.rs index eb6183fdfb..3e49f8526a 100644 --- a/gateway/src/serial_console.rs +++ b/gateway/src/serial_console.rs @@ -48,8 +48,12 @@ pub(crate) async fn run( log: Logger, ) -> WebsocketChannelResult { let upgraded = conn.into_inner(); - let config = - WebSocketConfig { max_send_queue: Some(4096), ..Default::default() }; + let config = WebSocketConfig { + // Maintain a max write buffer size of 2 MB (this is only relevant if + // writes are failing). + max_write_buffer_size: 2 * 1024 * 1024, + ..Default::default() + }; let ws_stream = WebSocketStream::from_raw_socket(upgraded, Role::Server, Some(config)) .await; diff --git a/gateway/tests/integration_tests/serial_console.rs b/gateway/tests/integration_tests/serial_console.rs index 9ab26bef4a..11cb9674a7 100644 --- a/gateway/tests/integration_tests/serial_console.rs +++ b/gateway/tests/integration_tests/serial_console.rs @@ -100,11 +100,12 @@ async fn serial_console_detach() { } tungstenite::Error::ConnectionClosed | tungstenite::Error::AlreadyClosed + | tungstenite::Error::AttackAttempt | tungstenite::Error::Io(_) | tungstenite::Error::Tls(_) | tungstenite::Error::Capacity(_) | tungstenite::Error::Protocol(_) - | tungstenite::Error::SendQueueFull(_) + | tungstenite::Error::WriteBufferFull(_) | tungstenite::Error::Utf8 | tungstenite::Error::Url(_) | tungstenite::Error::HttpFormat(_) => panic!("unexpected error"), From 36f6abe06548eae0f954f1b968f98def846f7d93 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 22 Nov 2023 10:04:29 -0800 Subject: [PATCH 014/186] factor out a common port settings saga function missed in #4528 (#4549) --- .../app/sagas/switch_port_settings_apply.rs | 60 ++++--------------- .../app/sagas/switch_port_settings_clear.rs | 15 +++-- .../app/sagas/switch_port_settings_common.rs | 37 ++++++++++++ 3 files changed, 57 insertions(+), 55 deletions(-) diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index aba62b6937..0d6bb52421 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -6,8 +6,8 @@ use super::{NexusActionContext, NEXUS_DPD_TAG}; use crate::app::sagas::retry_until_known_result; use crate::app::sagas::switch_port_settings_common::{ api_to_dpd_port_settings, ensure_switch_port_bgp_settings, - ensure_switch_port_uplink, select_mg_client, switch_sled_agent, - write_bootstore_config, + ensure_switch_port_uplink, select_dendrite_client, select_mg_client, + switch_sled_agent, write_bootstore_config, }; use crate::app::sagas::{ declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, @@ -19,9 +19,7 @@ use nexus_db_model::NETWORK_KEY; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; use omicron_common::api::external::{self, NameOrId}; -use omicron_common::api::internal::shared::{ - ParseSwitchLocationError, SwitchLocation, -}; +use omicron_common::api::internal::shared::SwitchLocation; use serde::{Deserialize, Serialize}; use std::net::IpAddr; use std::str::FromStr; @@ -160,6 +158,10 @@ async fn spa_ensure_switch_port_settings( ) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; let log = sagactx.user_data().log(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); let settings = sagactx .lookup::("switch_port_settings")?; @@ -170,7 +172,7 @@ async fn spa_ensure_switch_port_settings( })?; let dpd_client: Arc = - select_dendrite_client(&sagactx).await?; + select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; let dpd_port_settings = api_to_dpd_port_settings(&settings).map_err(|e| { @@ -227,8 +229,8 @@ async fn spa_undo_ensure_switch_port_settings( .lookup::>("original_switch_port_settings_id") .map_err(|e| external::Error::internal_error(&e.to_string()))?; - let dpd_client: Arc = - select_dendrite_client(&sagactx).await?; + let dpd_client = + select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; let id = match orig_port_settings_id { Some(id) => id, @@ -471,48 +473,6 @@ async fn spa_disassociate_switch_port( Ok(()) } -pub(crate) async fn select_dendrite_client( - sagactx: &NexusActionContext, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let switch_port = nexus - .get_switch_port(&opctx, params.switch_port_id) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get switch port for dendrite client selection {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let dpd_client: Arc = osagactx - .nexus() - .dpd_clients - .get(&switch_location) - .ok_or_else(|| { - ActionError::action_failed(format!( - "requested switch not available: {switch_location}" - )) - })? - .clone(); - Ok(dpd_client) -} - async fn spa_ensure_switch_port_bgp_settings( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index bcbd5bf894..0d876f8159 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -2,14 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::switch_port_settings_apply::select_dendrite_client; use super::{NexusActionContext, NEXUS_DPD_TAG}; use crate::app::sagas::retry_until_known_result; use crate::app::sagas::switch_port_settings_common::{ api_to_dpd_port_settings, apply_bootstore_update, bootstore_update, ensure_switch_port_bgp_settings, ensure_switch_port_uplink, - read_bootstore_config, select_mg_client, switch_sled_agent, - write_bootstore_config, + read_bootstore_config, select_dendrite_client, select_mg_client, + switch_sled_agent, write_bootstore_config, }; use crate::app::sagas::{ declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, @@ -147,11 +146,16 @@ async fn spa_clear_switch_port_settings( ) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; let log = sagactx.user_data().log(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); let port_id: PortId = PortId::from_str(¶ms.port_name) .map_err(|e| ActionError::action_failed(e.to_string()))?; - let dpd_client = select_dendrite_client(&sagactx).await?; + let dpd_client = + select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; retry_until_known_result(log, || async { dpd_client.port_settings_clear(&port_id, Some(NEXUS_DPD_TAG)).await @@ -191,7 +195,8 @@ async fn spa_undo_clear_switch_port_settings( .await .map_err(ActionError::action_failed)?; - let dpd_client = select_dendrite_client(&sagactx).await?; + let dpd_client = + select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; let dpd_port_settings = api_to_dpd_port_settings(&settings) .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index 8e66aa12f8..b328c6d1ac 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -575,3 +575,40 @@ pub struct EarlyNetworkPortUpdate { port: PortConfigV1, bgp_configs: Vec, } + +pub(crate) async fn select_dendrite_client( + sagactx: &NexusActionContext, + opctx: &OpContext, + switch_port_id: Uuid, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); + + let switch_port = + nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { + ActionError::action_failed(format!( + "get switch port for dendrite client selection {e}" + )) + })?; + + let switch_location: SwitchLocation = + switch_port.switch_location.parse().map_err( + |e: ParseSwitchLocationError| { + ActionError::action_failed(format!( + "get switch location for uplink: {e:?}", + )) + }, + )?; + + let dpd_client: Arc = osagactx + .nexus() + .dpd_clients + .get(&switch_location) + .ok_or_else(|| { + ActionError::action_failed(format!( + "requested switch not available: {switch_location}" + )) + })? + .clone(); + Ok(dpd_client) +} From 3f702ef442a2cb6522684c8b4028bc8a8b11ed6d Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 22 Nov 2023 17:26:19 -0800 Subject: [PATCH 015/186] [omicron-dev] increase test timeout to 30 seconds (#4557) On my machine (Ryzen 7950X) I saw that under load (32 tests running at the same time), the timeout would quite reliably be hit likely because cockroach was starved. Increasing it seems pretty harmless. --- dev-tools/omicron-dev/tests/test_omicron_dev.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/omicron-dev/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs index f1e8177243..7e78e5dc5a 100644 --- a/dev-tools/omicron-dev/tests/test_omicron_dev.rs +++ b/dev-tools/omicron-dev/tests/test_omicron_dev.rs @@ -27,7 +27,7 @@ use subprocess::Redirection; const CMD_OMICRON_DEV: &str = env!("CARGO_BIN_EXE_omicron-dev"); /// timeout used for various things that should be pretty quick -const TIMEOUT: Duration = Duration::from_secs(15); +const TIMEOUT: Duration = Duration::from_secs(30); fn path_to_omicron_dev() -> PathBuf { path_to_executable(CMD_OMICRON_DEV) From 47968b8e17a1a16c1da605da0d418efd8fa6026e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 25 Nov 2023 01:57:43 -0500 Subject: [PATCH 016/186] [Nexus] Add a sled to an initialized rack (#4545) This commit provides an external API for adding a sled to an already initialized rack. --- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/rack.rs | 19 +- nexus/db-model/src/schema.rs | 12 +- nexus/db-model/src/sled.rs | 4 +- .../src/sled_underlay_subnet_allocation.rs | 16 ++ nexus/db-model/src/switch.rs | 4 +- .../db-queries/src/db/datastore/inventory.rs | 73 ++++- nexus/db-queries/src/db/datastore/rack.rs | 254 ++++++++++++++++++ nexus/src/app/rack.rs | 139 ++++++++-- nexus/src/app/sled.rs | 3 + nexus/src/external_api/http_entrypoints.rs | 31 ++- nexus/tests/integration_tests/endpoints.rs | 17 +- nexus/tests/integration_tests/rack.rs | 66 +++++ nexus/tests/output/nexus_tags.txt | 1 + nexus/types/src/external_api/shared.rs | 36 +++ nexus/types/src/external_api/views.rs | 38 +-- nexus/types/src/internal_api/params.rs | 4 +- nexus/types/src/inventory.rs | 8 + openapi/nexus.json | 30 ++- schema/crdb/14.0.0/up1.sql | 37 +++ schema/crdb/14.0.0/up2.sql | 5 + schema/crdb/dbinit.sql | 47 +++- 22 files changed, 753 insertions(+), 93 deletions(-) create mode 100644 nexus/db-model/src/sled_underlay_subnet_allocation.rs create mode 100644 schema/crdb/14.0.0/up1.sql create mode 100644 schema/crdb/14.0.0/up2.sql diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 6b65eb87ec..ac5bad26f8 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -72,6 +72,7 @@ mod sled; mod sled_instance; mod sled_resource; mod sled_resource_kind; +mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; mod switch; @@ -153,6 +154,7 @@ pub use sled::*; pub use sled_instance::*; pub use sled_resource::*; pub use sled_resource_kind::*; +pub use sled_underlay_subnet_allocation::*; pub use snapshot::*; pub use ssh_key::*; pub use switch::*; diff --git a/nexus/db-model/src/rack.rs b/nexus/db-model/src/rack.rs index f2bc7528d2..580ec155b4 100644 --- a/nexus/db-model/src/rack.rs +++ b/nexus/db-model/src/rack.rs @@ -4,9 +4,8 @@ use crate::schema::rack; use db_macros::Asset; -use ipnetwork::{IpNetwork, Ipv6Network}; +use ipnetwork::IpNetwork; use nexus_types::{external_api::views, identity::Asset}; -use omicron_common::api; use uuid::Uuid; /// Information about a local rack. @@ -29,22 +28,6 @@ impl Rack { rack_subnet: None, } } - - pub fn subnet(&self) -> Result { - match self.rack_subnet { - Some(IpNetwork::V6(subnet)) => Ok(subnet), - Some(IpNetwork::V4(_)) => { - return Err(api::external::Error::InternalError { - internal_message: "rack subnet not IPv6".into(), - }) - } - None => { - return Err(api::external::Error::InternalError { - internal_message: "rack subnet not set".into(), - }) - } - } - } } impl From for views::Rack { diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7f7dd57027..afeac5e6cd 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -755,6 +755,16 @@ table! { } } +table! { + sled_underlay_subnet_allocation (rack_id, sled_id) { + rack_id -> Uuid, + sled_id -> Uuid, + subnet_octet -> Int2, + hw_baseboard_id -> Uuid, + } +} +allow_tables_to_appear_in_same_query!(rack, sled_underlay_subnet_allocation); + table! { switch (id) { id -> Uuid, @@ -1289,7 +1299,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(13, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(14, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index ba572901c6..4c82aa5d23 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -8,7 +8,7 @@ use crate::ipv6; use crate::schema::{physical_disk, service, sled, zpool}; use chrono::{DateTime, Utc}; use db_macros::Asset; -use nexus_types::{external_api::views, identity::Asset}; +use nexus_types::{external_api::shared, external_api::views, identity::Asset}; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use uuid::Uuid; @@ -88,7 +88,7 @@ impl From for views::Sled { Self { identity: sled.identity(), rack_id: sled.rack_id, - baseboard: views::Baseboard { + baseboard: shared::Baseboard { serial: sled.serial_number, part: sled.part_number, revision: sled.revision, diff --git a/nexus/db-model/src/sled_underlay_subnet_allocation.rs b/nexus/db-model/src/sled_underlay_subnet_allocation.rs new file mode 100644 index 0000000000..4da0bea669 --- /dev/null +++ b/nexus/db-model/src/sled_underlay_subnet_allocation.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::schema::sled_underlay_subnet_allocation; +use uuid::Uuid; + +/// Underlay allocation for a sled added to an initialized rack +#[derive(Queryable, Insertable, Debug, Clone, Selectable)] +#[diesel(table_name = sled_underlay_subnet_allocation)] +pub struct SledUnderlaySubnetAllocation { + pub rack_id: Uuid, + pub sled_id: Uuid, + pub subnet_octet: i16, + pub hw_baseboard_id: Uuid, +} diff --git a/nexus/db-model/src/switch.rs b/nexus/db-model/src/switch.rs index c9db100b0a..159888d91e 100644 --- a/nexus/db-model/src/switch.rs +++ b/nexus/db-model/src/switch.rs @@ -2,7 +2,7 @@ use super::Generation; use crate::schema::switch; use chrono::{DateTime, Utc}; use db_macros::Asset; -use nexus_types::{external_api::views, identity::Asset}; +use nexus_types::{external_api::shared, external_api::views, identity::Asset}; use uuid::Uuid; /// Baseboard information about a switch. @@ -57,7 +57,7 @@ impl From for views::Switch { Self { identity: switch.identity(), rack_id: switch.rack_id, - baseboard: views::Baseboard { + baseboard: shared::Baseboard { serial: switch.serial_number, part: switch.part_number, revision: switch.revision, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 28a438629e..31b24a7e75 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -7,6 +7,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; +use crate::db::error::public_error_from_diesel_lookup; use crate::db::error::ErrorHandler; use crate::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use crate::db::TransactionError; @@ -21,6 +22,7 @@ use diesel::ExpressionMethods; use diesel::IntoSql; use diesel::JoinOnDsl; use diesel::NullableExpressionMethods; +use diesel::OptionalExtension; use diesel::QueryDsl; use diesel::Table; use futures::future::BoxFuture; @@ -42,9 +44,12 @@ use nexus_db_model::SpType; use nexus_db_model::SpTypeEnum; use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; +use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -910,30 +915,62 @@ impl DataStore { Ok(()) } + // Find the primary key for `hw_baseboard_id` given a `BaseboardId` + pub async fn find_hw_baseboard_id( + &self, + opctx: &OpContext, + baseboard_id: BaseboardId, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?; + let conn = self.pool_connection_authorized(opctx).await?; + use db::schema::hw_baseboard_id::dsl; + dsl::hw_baseboard_id + .filter(dsl::serial_number.eq(baseboard_id.serial_number.clone())) + .filter(dsl::part_number.eq(baseboard_id.part_number.clone())) + .select(dsl::id) + .first_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel_lookup( + e, + ResourceType::Sled, + &LookupType::ByCompositeId(format!("{baseboard_id:?}")), + ) + }) + } + /// Attempt to read the latest collection while limiting queries to `limit` /// records + /// + /// If there aren't any collections, return `Ok(None)`. pub async fn inventory_get_latest_collection( &self, opctx: &OpContext, limit: NonZeroU32, - ) -> Result { + ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?; let conn = self.pool_connection_authorized(opctx).await?; use db::schema::inv_collection::dsl; let collection_id = dsl::inv_collection .select(dsl::id) .order_by(dsl::time_started.desc()) - .limit(1) .first_async::(&*conn) .await + .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - self.inventory_collection_read_all_or_nothing( - opctx, - collection_id, - limit, - ) - .await + let Some(collection_id) = collection_id else { + return Ok(None); + }; + + Ok(Some( + self.inventory_collection_read_all_or_nothing( + opctx, + collection_id, + limit, + ) + .await?, + )) } /// Attempt to read the given collection while limiting queries to `limit` @@ -1335,9 +1372,11 @@ mod test { use nexus_inventory::examples::Representative; use nexus_test_utils::db::test_setup_database; use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; + use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use nexus_types::inventory::RotPageWhich; + use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::num::NonZeroU32; use uuid::Uuid; @@ -1393,6 +1432,24 @@ mod test { } } + #[tokio::test] + async fn test_find_hw_baseboard_id_missing_returns_not_found() { + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + let baseboard_id = BaseboardId { + serial_number: "some-serial".into(), + part_number: "some-part".into(), + }; + let err = datastore + .find_hw_baseboard_id(&opctx, baseboard_id) + .await + .unwrap_err(); + assert!(matches!(err, Error::ObjectNotFound { .. })); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + /// Tests inserting several collections, reading them back, and making sure /// they look the same. #[tokio::test] diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 2cc5880470..e11377f11a 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -41,6 +41,7 @@ use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; +use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -55,6 +56,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; +use omicron_common::bail_unless; use std::net::IpAddr; use uuid::Uuid; @@ -214,6 +216,126 @@ impl DataStore { Ok(()) } + // Return the subnet for the rack + pub async fn rack_subnet( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + use db::schema::rack::dsl; + // It's safe to unwrap the returned `rack_subnet` because + // we filter on `rack_subnet.is_not_null()` + let subnet = dsl::rack + .filter(dsl::id.eq(rack_id)) + .filter(dsl::rack_subnet.is_not_null()) + .select(dsl::rack_subnet) + .first_async::>(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + match subnet { + Some(subnet) => Ok(subnet), + None => Err(Error::internal_error( + "DB Error(bug): returned a null subnet for {rack_id}", + )), + } + } + + /// Allocate a rack subnet octet to a given sled + /// + /// 1. Find the existing allocations + /// 2. Calculate the new allocation + /// 3. Save the new allocation, if there isn't one for the given + /// `hw_baseboard_id` + /// 4. Return the new allocation + /// + // TODO: This could all actually be done in SQL using a `next_item` query. + // See https://github.com/oxidecomputer/omicron/issues/4544 + pub async fn allocate_sled_underlay_subnet_octets( + &self, + opctx: &OpContext, + rack_id: Uuid, + hw_baseboard_id: Uuid, + ) -> Result { + // Fetch all the existing allocations via self.rack_id + let allocations = self.rack_subnet_allocations(opctx, rack_id).await?; + + // Calculate the allocation for the new sled by choosing the minimum + // octet. The returned allocations are ordered by octet, so we will know + // when we have a free one. However, if we already have an allocation + // for the given sled then reuse that one. + const MIN_SUBNET_OCTET: i16 = 33; + let mut new_allocation = SledUnderlaySubnetAllocation { + rack_id, + sled_id: Uuid::new_v4(), + subnet_octet: MIN_SUBNET_OCTET, + hw_baseboard_id, + }; + let mut allocation_already_exists = false; + for allocation in allocations { + if allocation.hw_baseboard_id == new_allocation.hw_baseboard_id { + // We already have an allocation for this sled. + new_allocation = allocation; + allocation_already_exists = true; + break; + } + if allocation.subnet_octet == new_allocation.subnet_octet { + bail_unless!( + new_allocation.subnet_octet < 255, + "Too many sled subnets allocated" + ); + new_allocation.subnet_octet += 1; + } + } + + // Write the new allocation row to CRDB. The UNIQUE constraint + // on `subnet_octet` will prevent dueling administrators reusing + // allocations when sleds are being added. We will need another + // mechanism ala generation numbers when we must interleave additions + // and removals of sleds. + if !allocation_already_exists { + self.sled_subnet_allocation_insert(opctx, &new_allocation).await?; + } + + Ok(new_allocation) + } + + /// Return all current underlay allocations for the rack. + /// + /// Order allocations by `subnet_octet` + pub async fn rack_subnet_allocations( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::sled_underlay_subnet_allocation::dsl as subnet_dsl; + subnet_dsl::sled_underlay_subnet_allocation + .filter(subnet_dsl::rack_id.eq(rack_id)) + .select(SledUnderlaySubnetAllocation::as_select()) + .order_by(subnet_dsl::subnet_octet.asc()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Store a new sled subnet allocation in the database + pub async fn sled_subnet_allocation_insert( + &self, + opctx: &OpContext, + allocation: &SledUnderlaySubnetAllocation, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use db::schema::sled_underlay_subnet_allocation::dsl; + diesel::insert_into(dsl::sled_underlay_subnet_allocation) + .values(allocation.clone()) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + } + // The following methods which return a `TxnError` take a `conn` parameter // which comes from the transaction created in `rack_set_initialized`. @@ -1518,4 +1640,136 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn rack_sled_subnet_allocations() { + let logctx = dev::test_setup_log("rack_sled_subnet_allocations"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Ensure we get an empty list when there are no allocations + let allocations = + datastore.rack_subnet_allocations(&opctx, rack_id).await.unwrap(); + assert!(allocations.is_empty()); + + // Add 5 allocations + for i in 0..5i16 { + let allocation = SledUnderlaySubnetAllocation { + rack_id, + sled_id: Uuid::new_v4(), + subnet_octet: 33 + i, + hw_baseboard_id: Uuid::new_v4(), + }; + datastore + .sled_subnet_allocation_insert(&opctx, &allocation) + .await + .unwrap(); + } + + // List all 5 allocations + let allocations = + datastore.rack_subnet_allocations(&opctx, rack_id).await.unwrap(); + + assert_eq!(5, allocations.len()); + + // Try to add another allocation for the same octet, but with a distinct + // sled_id. Ensure we get an error due to a unique constraint. + let mut should_fail_allocation = SledUnderlaySubnetAllocation { + rack_id, + sled_id: Uuid::new_v4(), + subnet_octet: 37, + hw_baseboard_id: Uuid::new_v4(), + }; + let _err = datastore + .sled_subnet_allocation_insert(&opctx, &should_fail_allocation) + .await + .unwrap_err(); + + // Adding an allocation for the same {rack_id, sled_id} pair fails + // the second time, even with a distinct subnet_epoch + let mut allocation = should_fail_allocation.clone(); + allocation.subnet_octet = 38; + datastore + .sled_subnet_allocation_insert(&opctx, &allocation) + .await + .unwrap(); + + should_fail_allocation.subnet_octet = 39; + should_fail_allocation.hw_baseboard_id = Uuid::new_v4(); + let _err = datastore + .sled_subnet_allocation_insert(&opctx, &should_fail_allocation) + .await + .unwrap_err(); + + // Allocations outside our expected range fail + let mut should_fail_allocation = SledUnderlaySubnetAllocation { + rack_id, + sled_id: Uuid::new_v4(), + subnet_octet: 32, + hw_baseboard_id: Uuid::new_v4(), + }; + let _err = datastore + .sled_subnet_allocation_insert(&opctx, &should_fail_allocation) + .await + .unwrap_err(); + should_fail_allocation.subnet_octet = 256; + let _err = datastore + .sled_subnet_allocation_insert(&opctx, &should_fail_allocation) + .await + .unwrap_err(); + + // We should have 6 allocations + let allocations = + datastore.rack_subnet_allocations(&opctx, rack_id).await.unwrap(); + + assert_eq!(6, allocations.len()); + assert_eq!( + vec![33, 34, 35, 36, 37, 38], + allocations.iter().map(|a| a.subnet_octet).collect::>() + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn allocate_sled_underlay_subnet_octets() { + let logctx = dev::test_setup_log("rack_sled_subnet_allocations"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + let mut allocated_octets = vec![]; + for _ in 0..5 { + allocated_octets.push( + datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + Uuid::new_v4(), + ) + .await + .unwrap() + .subnet_octet, + ); + } + + let expected = vec![33, 34, 35, 36, 37]; + assert_eq!(expected, allocated_octets); + + // We should have 5 allocations in the DB, sorted appropriately + let allocations = + datastore.rack_subnet_allocations(&opctx, rack_id).await.unwrap(); + assert_eq!(5, allocations.len()); + assert_eq!( + expected, + allocations.iter().map(|a| a.subnet_octet).collect::>() + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 1c2e49e260..984ece2d0c 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -10,7 +10,7 @@ use crate::external_api::params::CertificateCreate; use crate::external_api::shared::ServiceUsingCertificate; use crate::internal_api::params::RackInitializationRequest; use gateway_client::types::SpType; -use ipnetwork::IpNetwork; +use ipnetwork::{IpNetwork, Ipv6Network}; use nexus_db_model::DnsGroup; use nexus_db_model::InitialDnsGroup; use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; @@ -29,13 +29,14 @@ use nexus_types::external_api::params::{ AddressLotCreate, LoopbackAddressCreate, Route, SiloCreate, SwitchPortSettingsCreate, }; +use nexus_types::external_api::shared::Baseboard; use nexus_types::external_api::shared::FleetRole; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::shared::SiloRole; +use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views; -use nexus_types::external_api::views::Baseboard; -use nexus_types::external_api::views::UninitializedSled; use nexus_types::internal_api::params::DnsRecord; +use omicron_common::address::{get_64_subnet, Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -45,7 +46,10 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use sled_agent_client::types::AddSledRequest; use sled_agent_client::types::EarlyNetworkConfigBody; +use sled_agent_client::types::StartSledAgentRequest; +use sled_agent_client::types::StartSledAgentRequestBody; use sled_agent_client::types::{ BgpConfig, BgpPeerConfig, EarlyNetworkConfig, PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, @@ -584,20 +588,7 @@ impl super::Nexus { if rack.rack_subnet.is_some() { return Ok(()); } - let addr = self - .sled_list(opctx, &DataPageParams::max_page()) - .await? - .get(0) - .ok_or(Error::InternalError { - internal_message: "no sleds at time of bootstore sync".into(), - })? - .address(); - - let sa = sled_agent_client::Client::new( - &format!("http://{}", addr), - self.log.clone(), - ); - + let sa = self.get_any_sled_agent(opctx).await?; let result = sa .read_network_bootstore_config_cache() .await @@ -619,7 +610,7 @@ impl super::Nexus { opctx: &OpContext, ) -> Result { let rack = self.rack_lookup(opctx, &self.rack_id).await?; - let subnet = rack.subnet()?; + let subnet = rack_subnet(rack.rack_subnet)?; let db_ports = self.active_port_settings(opctx).await?; let mut ports = Vec::new(); @@ -726,18 +717,28 @@ impl super::Nexus { &self, opctx: &OpContext, ) -> ListResultVec { + debug!(self.log, "Getting latest collection"); // Grab the SPs from the last collection let limit = NonZeroU32::new(50).unwrap(); let collection = self .db_datastore .inventory_get_latest_collection(opctx, limit) .await?; + + // There can't be any uninitialized sleds we know about + // if there is no inventory. + let Some(collection) = collection else { + return Ok(vec![]); + }; + let pagparams = DataPageParams { marker: None, direction: dropshot::PaginationOrder::Descending, // TODO: This limit is only suitable for a single sled cluster limit: NonZeroU32::new(32).unwrap(), }; + + debug!(self.log, "Listing sleds"); let sleds = self.db_datastore.sled_list(opctx, &pagparams).await?; let mut uninitialized_sleds: Vec = collection @@ -767,4 +768,106 @@ impl super::Nexus { uninitialized_sleds.retain(|s| !sled_baseboards.contains(&s.baseboard)); Ok(uninitialized_sleds) } + + /// Add a sled to an intialized rack + pub(crate) async fn add_sled_to_initialized_rack( + &self, + opctx: &OpContext, + sled: UninitializedSled, + ) -> Result<(), Error> { + let baseboard_id = sled.baseboard.clone().into(); + let hw_baseboard_id = + self.db_datastore.find_hw_baseboard_id(opctx, baseboard_id).await?; + + let subnet = self.db_datastore.rack_subnet(opctx, sled.rack_id).await?; + let rack_subnet = + Ipv6Subnet::::from(rack_subnet(Some(subnet))?); + + let allocation = self + .db_datastore + .allocate_sled_underlay_subnet_octets( + opctx, + sled.rack_id, + hw_baseboard_id, + ) + .await?; + + // Convert the baseboard as necessary + let baseboard = sled_agent_client::types::Baseboard::Gimlet { + identifier: sled.baseboard.serial.clone(), + model: sled.baseboard.part.clone(), + revision: sled.baseboard.revision, + }; + + // Make the call to sled-agent + let req = AddSledRequest { + sled_id: baseboard, + start_request: StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: allocation.sled_id, + rack_id: allocation.rack_id, + use_trust_quorum: true, + is_lrtq_learner: true, + subnet: sled_agent_client::types::Ipv6Subnet { + net: get_64_subnet( + rack_subnet, + allocation.subnet_octet.try_into().unwrap(), + ) + .net() + .into(), + }, + }, + }, + }; + let sa = self.get_any_sled_agent(opctx).await?; + sa.add_sled_to_initialized_rack(&req).await.map_err(|e| { + Error::InternalError { + internal_message: format!( + "failed to add sled with baseboard {:?} to rack {}: {e}", + sled.baseboard, allocation.rack_id + ), + } + })?; + + Ok(()) + } + + async fn get_any_sled_agent( + &self, + opctx: &OpContext, + ) -> Result { + let addr = self + .sled_list(opctx, &DataPageParams::max_page()) + .await? + .get(0) + .ok_or(Error::InternalError { + internal_message: "no sled agents available".into(), + })? + .address(); + + Ok(sled_agent_client::Client::new( + &format!("http://{}", addr), + self.log.clone(), + )) + } +} + +pub fn rack_subnet( + rack_subnet: Option, +) -> Result { + match rack_subnet { + Some(IpNetwork::V6(subnet)) => Ok(subnet), + Some(IpNetwork::V4(_)) => { + return Err(Error::InternalError { + internal_message: "rack subnet not IPv6".into(), + }) + } + None => { + return Err(Error::InternalError { + internal_message: "rack subnet not set".into(), + }) + } + } } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 8189c0a93d..c2931f1441 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -38,6 +38,9 @@ impl super::Nexus { // TODO-robustness we should have a limit on how many sled agents there can // be (for graceful degradation at large scale). + // + // TODO-multisled: This should not use the rack_id for the given nexus, + // unless the DNS lookups at sled-agent are only for rack-local nexuses. pub(crate) async fn upsert_sled( &self, opctx: &OpContext, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 428632bcf5..78f675c28a 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6,10 +6,11 @@ use super::{ console_api, device_auth, params, + shared::UninitializedSled, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, - PhysicalDisk, Project, Rack, Role, Silo, Sled, Snapshot, SshKey, - UninitializedSled, User, UserBuiltin, Vpc, VpcRouter, VpcSubnet, + PhysicalDisk, Project, Rack, Role, Silo, Sled, Snapshot, SshKey, User, + UserBuiltin, Vpc, VpcRouter, VpcSubnet, }, }; use crate::external_api::shared; @@ -223,6 +224,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(switch_list)?; api.register(switch_view)?; api.register(uninitialized_sled_list)?; + api.register(add_sled_to_initialized_rack)?; api.register(user_builtin_list)?; api.register(user_builtin_view)?; @@ -4402,6 +4404,31 @@ async fn uninitialized_sled_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Add a sled to an initialized rack +// +// TODO: In the future this should really be a PUT request, once we resolve +// https://github.com/oxidecomputer/omicron/issues/4494. It should also +// explicitly be tied to a rack via a `rack_id` path param. For now we assume +// we are only operating on single rack systems. +#[endpoint { + method = POST, + path = "/v1/system/hardware/sleds/", + tags = ["system/hardware"] +}] +async fn add_sled_to_initialized_rack( + rqctx: RequestContext>, + sled: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let nexus = &apictx.nexus; + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + nexus.add_sled_to_initialized_rack(&opctx, sled.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Sleds /// List sleds diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 64790c49c2..5dfdcc151d 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -21,8 +21,10 @@ use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils::SWITCH_UUID; use nexus_types::external_api::params; use nexus_types::external_api::shared; +use nexus_types::external_api::shared::Baseboard; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; +use nexus_types::external_api::shared::UninitializedSled; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -39,6 +41,7 @@ use omicron_test_utils::certificates::CertificateChain; use std::net::IpAddr; use std::net::Ipv4Addr; use std::str::FromStr; +use uuid::Uuid; lazy_static! { pub static ref HARDWARE_RACK_URL: String = @@ -57,6 +60,16 @@ lazy_static! { pub static ref SLED_INSTANCES_URL: String = format!("/v1/system/hardware/sleds/{}/instances", SLED_AGENT_UUID); + pub static ref DEMO_UNINITIALIZED_SLED: UninitializedSled = UninitializedSled { + baseboard: Baseboard { + serial: "demo-serial".to_string(), + part: "demo-part".to_string(), + revision: 6 + }, + rack_id: Uuid::new_v4(), + cubby: 1 + }; + // Global policy pub static ref SYSTEM_POLICY_URL: &'static str = "/v1/system/policy"; @@ -1577,7 +1590,9 @@ lazy_static! { url: "/v1/system/hardware/sleds", visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], + allowed_methods: vec![AllowedMethod::Get, AllowedMethod::Post( + serde_json::to_value(&*DEMO_UNINITIALIZED_SLED).unwrap() + )], }, VerifyEndpoint { diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 2c191f27ae..9f77223871 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -10,8 +10,14 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::TEST_SUITE_PASSWORD; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::Rack; +use nexus_types::internal_api::params::Baseboard; +use nexus_types::internal_api::params::SledAgentStartupInfo; +use nexus_types::internal_api::params::SledRole; +use omicron_common::api::external::ByteCount; use omicron_nexus::TestInterfaces; +use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -77,3 +83,63 @@ async fn test_rack_initialization(cptestctx: &ControlPlaneTestContext) { ) .await; } + +#[nexus_test] +async fn test_uninitialized_sled_list(cptestctx: &ControlPlaneTestContext) { + let internal_client = &cptestctx.internal_client; + let external_client = &cptestctx.external_client; + let list_url = "/v1/system/hardware/uninitialized-sleds"; + let mut uninitialized_sleds = + NexusRequest::object_get(external_client, &list_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get uninitialized sleds") + .parsed_body::>() + .unwrap(); + debug!(cptestctx.logctx.log, "{:#?}", uninitialized_sleds); + + // There are currently two fake sim gimlets created in the latest inventory + // collection as part of test setup. + assert_eq!(2, uninitialized_sleds.len()); + + // Insert one of these fake sleds into the `sled` table. + // Just pick some random fields other than `baseboard` + let baseboard = uninitialized_sleds.pop().unwrap().baseboard; + let sled_uuid = Uuid::new_v4(); + let sa = SledAgentStartupInfo { + sa_address: "[fd00:1122:3344:01::1]:8080".parse().unwrap(), + role: SledRole::Gimlet, + baseboard: Baseboard { + serial_number: baseboard.serial, + part_number: baseboard.part, + revision: baseboard.revision, + }, + usable_hardware_threads: 32, + usable_physical_ram: ByteCount::from_gibibytes_u32(100), + reservoir_size: ByteCount::from_mebibytes_u32(100), + }; + internal_client + .make_request( + Method::POST, + format!("/sled-agents/{sled_uuid}").as_str(), + Some(&sa), + StatusCode::NO_CONTENT, + ) + .await + .unwrap(); + + // Ensure there's only one unintialized sled remaining, and it's not + // the one that was just added into the `sled` table + let uninitialized_sleds_2 = + NexusRequest::object_get(external_client, &list_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get uninitialized sleds") + .parsed_body::>() + .unwrap(); + debug!(cptestctx.logctx.log, "{:#?}", uninitialized_sleds); + assert_eq!(1, uninitialized_sleds_2.len()); + assert_eq!(uninitialized_sleds, uninitialized_sleds_2); +} diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 7f0c30c471..dd387ab979 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -110,6 +110,7 @@ snapshot_view GET /v1/snapshots/{snapshot} API operations found with tag "system/hardware" OPERATION ID METHOD URL PATH +add_sled_to_initialized_rack POST /v1/system/hardware/sleds networking_switch_port_apply_settings POST /v1/system/hardware/switch-port/{port}/settings networking_switch_port_clear_settings DELETE /v1/system/hardware/switch-port/{port}/settings networking_switch_port_list GET /v1/system/hardware/switch-port diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 48fbb9c10d..a4c5ae1e62 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -245,6 +245,42 @@ pub enum UpdateableComponentType { HostOmicron, } +/// Properties that uniquely identify an Oxide hardware component +#[derive( + Clone, + Debug, + Serialize, + Deserialize, + JsonSchema, + PartialOrd, + Ord, + PartialEq, + Eq, +)] +pub struct Baseboard { + pub serial: String, + pub part: String, + pub revision: i64, +} + +/// A sled that has not been added to an initialized rack yet +#[derive( + Clone, + Debug, + Serialize, + Deserialize, + JsonSchema, + PartialOrd, + Ord, + PartialEq, + Eq, +)] +pub struct UninitializedSled { + pub baseboard: Baseboard, + pub rack_id: Uuid, + pub cubby: u16, +} + #[cfg(test)] mod test { use super::Policy; diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index b34fc7a542..9dfe36d63b 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -5,7 +5,7 @@ //! Views are response bodies, most of which are public lenses onto DB models. use crate::external_api::shared::{ - self, IpKind, IpRange, ServiceUsingCertificate, + self, Baseboard, IpKind, IpRange, ServiceUsingCertificate, }; use crate::identity::AssetIdentityMetadata; use api_identity::ObjectIdentity; @@ -274,44 +274,8 @@ pub struct Rack { pub identity: AssetIdentityMetadata, } -/// View of a sled that has not been added to an initialized rack yet -#[derive( - Clone, - Debug, - Serialize, - Deserialize, - JsonSchema, - PartialOrd, - Ord, - PartialEq, - Eq, -)] -pub struct UninitializedSled { - pub baseboard: Baseboard, - pub rack_id: Uuid, - pub cubby: u16, -} - // FRUs -/// Properties that uniquely identify an Oxide hardware component -#[derive( - Clone, - Debug, - Serialize, - Deserialize, - JsonSchema, - PartialOrd, - Ord, - PartialEq, - Eq, -)] -pub struct Baseboard { - pub serial: String, - pub part: String, - pub revision: i64, -} - // SLEDS /// An operator's view of a Sled. diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index c0991ebb17..bc25e8d4bd 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -25,7 +25,7 @@ use uuid::Uuid; /// /// Note that this may change if the sled is physically moved /// within the rack. -#[derive(Serialize, Deserialize, JsonSchema)] +#[derive(Serialize, Deserialize, JsonSchema, Debug)] #[serde(rename_all = "snake_case")] pub enum SledRole { /// The sled is a general compute sled. @@ -45,7 +45,7 @@ pub struct Baseboard { } /// Sent by a sled agent on startup to Nexus to request further instruction -#[derive(Serialize, Deserialize, JsonSchema)] +#[derive(Serialize, Deserialize, Debug, JsonSchema)] pub struct SledAgentStartupInfo { /// The address of the sled agent's API endpoint pub sa_address: SocketAddrV6, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 19c323d894..9401727162 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -20,6 +20,8 @@ use std::sync::Arc; use strum::EnumIter; use uuid::Uuid; +use crate::external_api::shared::Baseboard; + /// Results of collecting hardware/software inventory from various Omicron /// components /// @@ -131,6 +133,12 @@ pub struct BaseboardId { pub serial_number: String, } +impl From for BaseboardId { + fn from(value: Baseboard) -> Self { + BaseboardId { part_number: value.part, serial_number: value.serial } + } +} + /// Caboose contents found during a collection /// /// These are normalized in the database. Each distinct `Caboose` is assigned a diff --git a/openapi/nexus.json b/openapi/nexus.json index 0d19e81d9a..704aa393db 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3610,6 +3610,34 @@ "x-dropshot-pagination": { "required": [] } + }, + "post": { + "tags": [ + "system/hardware" + ], + "summary": "Add a sled to an initialized rack", + "operationId": "add_sled_to_initialized_rack", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UninitializedSled" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } } }, "/v1/system/hardware/sleds/{sled_id}": { @@ -13971,7 +13999,7 @@ ] }, "UninitializedSled": { - "description": "View of a sled that has not been added to an initialized rack yet", + "description": "A sled that has not been added to an initialized rack yet", "type": "object", "properties": { "baseboard": { diff --git a/schema/crdb/14.0.0/up1.sql b/schema/crdb/14.0.0/up1.sql new file mode 100644 index 0000000000..3bff831ceb --- /dev/null +++ b/schema/crdb/14.0.0/up1.sql @@ -0,0 +1,37 @@ +-- Table of all sled subnets allocated for sleds added to an already initialized +-- rack. The sleds in this table and their allocated subnets are created before +-- a sled is added to the `sled` table. Addition to the `sled` table occurs +-- after the sled is initialized and notifies Nexus about itself. +-- +-- For simplicity and space savings, this table doesn't actually contain the +-- full subnets for a given sled, but only the octet that extends a /56 rack +-- subnet to a /64 sled subnet. The rack subnet is maintained in the `rack` +-- table. +-- +-- This table does not include subnet octets allocated during RSS and therefore +-- all of the octets start at 33. This makes the data in this table purely additive +-- post-RSS, which also implies that we cannot re-use subnet octets if an original +-- sled that was part of RSS was removed from the cluster. +CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( + -- The physical identity of the sled + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID PRIMARY KEY, + + -- The rack to which a sled is being added + -- (foreign key into `rack` table) + -- + -- We require this because the sled is not yet part of the sled table when + -- we first allocate a subnet for it. + rack_id UUID NOT NULL, + + -- The sled to which a subnet is being allocated + -- + -- Eventually will be a foreign key into the `sled` table when the sled notifies nexus + -- about itself after initialization. + sled_id UUID NOT NULL, + + -- The octet that extends a /56 rack subnet to a /64 sled subnet + -- + -- Always between 33 and 255 inclusive + subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255) +); diff --git a/schema/crdb/14.0.0/up2.sql b/schema/crdb/14.0.0/up2.sql new file mode 100644 index 0000000000..c3e18fa166 --- /dev/null +++ b/schema/crdb/14.0.0/up2.sql @@ -0,0 +1,5 @@ +-- Add an index which allows pagination by {rack_id, sled_id} pairs. +CREATE UNIQUE INDEX IF NOT EXISTS lookup_subnet_allocation_by_rack_and_sled ON omicron.public.sled_underlay_subnet_allocation ( + rack_id, + sled_id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index fc3bc37fd7..728b084982 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -158,6 +158,51 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_resource_by_sled ON omicron.public.sled id ); + +-- Table of all sled subnets allocated for sleds added to an already initialized +-- rack. The sleds in this table and their allocated subnets are created before +-- a sled is added to the `sled` table. Addition to the `sled` table occurs +-- after the sled is initialized and notifies Nexus about itself. +-- +-- For simplicity and space savings, this table doesn't actually contain the +-- full subnets for a given sled, but only the octet that extends a /56 rack +-- subnet to a /64 sled subnet. The rack subnet is maintained in the `rack` +-- table. +-- +-- This table does not include subnet octets allocated during RSS and therefore +-- all of the octets start at 33. This makes the data in this table purely additive +-- post-RSS, which also implies that we cannot re-use subnet octets if an original +-- sled that was part of RSS was removed from the cluster. +CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( + -- The physical identity of the sled + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID PRIMARY KEY, + + -- The rack to which a sled is being added + -- (foreign key into `rack` table) + -- + -- We require this because the sled is not yet part of the sled table when + -- we first allocate a subnet for it. + rack_id UUID NOT NULL, + + -- The sled to which a subnet is being allocated + -- + -- Eventually will be a foreign key into the `sled` table when the sled notifies nexus + -- about itself after initialization. + sled_id UUID NOT NULL, + + -- The octet that extends a /56 rack subnet to a /64 sled subnet + -- + -- Always between 33 and 255 inclusive + subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255) +); + +-- Add an index which allows pagination by {rack_id, sled_id} pairs. +CREATE UNIQUE INDEX IF NOT EXISTS lookup_subnet_allocation_by_rack_and_sled ON omicron.public.sled_underlay_subnet_allocation ( + rack_id, + sled_id +); + /* * Switches */ @@ -2952,7 +2997,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '13.0.0', NULL) + ( TRUE, NOW(), NOW(), '14.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From f03c7d5b460f149f626dd82bcf72cdc47d5a4552 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 01:39:26 +0000 Subject: [PATCH 017/186] Update Rust to v1.74.0 (#4543) Co-authored-by: Rain --- bootstore/src/schemes/v0/request_manager.rs | 2 +- common/src/api/external/mod.rs | 2 +- illumos-utils/src/running_zone.rs | 5 +++-- nexus/db-queries/src/db/queries/volume.rs | 7 ++++--- nexus/db-queries/src/db/saga_recovery.rs | 3 +-- oximeter/instruments/src/kstat/link.rs | 4 ++-- rust-toolchain.toml | 2 +- wicket/src/ui/widgets/popup.rs | 2 +- 8 files changed, 14 insertions(+), 13 deletions(-) diff --git a/bootstore/src/schemes/v0/request_manager.rs b/bootstore/src/schemes/v0/request_manager.rs index 780213430c..90466fdc07 100644 --- a/bootstore/src/schemes/v0/request_manager.rs +++ b/bootstore/src/schemes/v0/request_manager.rs @@ -109,7 +109,7 @@ impl RequestManager { let expiry = now + self.config.rack_init_timeout; let mut acks = InitAcks::default(); acks.expected = - packages.keys().cloned().filter(|id| id != &self.id).collect(); + packages.keys().filter(|&id| id != &self.id).cloned().collect(); let req = TrackableRequest::InitRack { rack_uuid, packages: packages.clone(), diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index adf661516a..3e58d1d4d4 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -409,7 +409,7 @@ impl SemverVersion { /// This is the official ECMAScript-compatible validation regex for /// semver: /// - const VALIDATION_REGEX: &str = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"; + const VALIDATION_REGEX: &'static str = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"; } impl JsonSchema for SemverVersion { diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index bdf7ed0cbf..ba8cd009e8 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -214,7 +214,7 @@ mod zenter { // the contracts used for this come from templates that define becoming // empty as a critical event. pub fn contract_reaper(log: Logger) { - const EVENT_PATH: &[u8] = b"/system/contract/process/pbundle"; + const EVENT_PATH: &'static [u8] = b"/system/contract/process/pbundle"; const CT_PR_EV_EMPTY: u64 = 1; let cpath = CString::new(EVENT_PATH).unwrap(); @@ -327,7 +327,8 @@ mod zenter { } impl Template { - const TEMPLATE_PATH: &[u8] = b"/system/contract/process/template\0"; + const TEMPLATE_PATH: &'static [u8] = + b"/system/contract/process/template\0"; // Constants related to how the contract below is managed. See // `usr/src/uts/common/sys/contract/process.h` in the illumos sources diff --git a/nexus/db-queries/src/db/queries/volume.rs b/nexus/db-queries/src/db/queries/volume.rs index 31882dca89..2c1a9af19b 100644 --- a/nexus/db-queries/src/db/queries/volume.rs +++ b/nexus/db-queries/src/db/queries/volume.rs @@ -412,10 +412,11 @@ pub struct DecreaseCrucibleResourceCountAndSoftDeleteVolume { } impl DecreaseCrucibleResourceCountAndSoftDeleteVolume { - const UPDATED_REGION_SNAPSHOTS_TABLE: &str = "updated_region_snapshots"; - const REGION_SNAPSHOTS_TO_CLEAN_UP_TABLE: &str = + const UPDATED_REGION_SNAPSHOTS_TABLE: &'static str = + "updated_region_snapshots"; + const REGION_SNAPSHOTS_TO_CLEAN_UP_TABLE: &'static str = "region_snapshots_to_clean_up"; - const UPDATED_VOLUME_TABLE: &str = "updated_volume"; + const UPDATED_VOLUME_TABLE: &'static str = "updated_volume"; pub fn new(volume_id: Uuid, snapshot_addrs: Vec) -> Self { Self { diff --git a/nexus/db-queries/src/db/saga_recovery.rs b/nexus/db-queries/src/db/saga_recovery.rs index f3eada1645..802093b889 100644 --- a/nexus/db-queries/src/db/saga_recovery.rs +++ b/nexus/db-queries/src/db/saga_recovery.rs @@ -143,8 +143,7 @@ where .await }); - let mut completion_futures = vec![]; - completion_futures.reserve(recovery_futures.len()); + let mut completion_futures = Vec::with_capacity(recovery_futures.len()); // Loads and resumes all sagas in serial. for recovery_future in recovery_futures { let saga_complete_future = recovery_future.await?; diff --git a/oximeter/instruments/src/kstat/link.rs b/oximeter/instruments/src/kstat/link.rs index d22ac60378..03397c4108 100644 --- a/oximeter/instruments/src/kstat/link.rs +++ b/oximeter/instruments/src/kstat/link.rs @@ -268,8 +268,8 @@ mod tests { } impl TestEtherstub { - const PFEXEC: &str = "/usr/bin/pfexec"; - const DLADM: &str = "/usr/sbin/dladm"; + const PFEXEC: &'static str = "/usr/bin/pfexec"; + const DLADM: &'static str = "/usr/sbin/dladm"; fn new() -> Self { let name = format!( "kstest{}0", diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 804ff08cce..65ee8a9912 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -4,5 +4,5 @@ # # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. -channel = "1.73.0" +channel = "1.74.0" profile = "default" diff --git a/wicket/src/ui/widgets/popup.rs b/wicket/src/ui/widgets/popup.rs index 19d7aa18b1..fb8c0f1f24 100644 --- a/wicket/src/ui/widgets/popup.rs +++ b/wicket/src/ui/widgets/popup.rs @@ -464,7 +464,7 @@ pub fn draw_buttons( let button_rects = Layout::default() .direction(Direction::Horizontal) .horizontal_margin(2) - .constraints(constraints.as_ref()) + .constraints(constraints) .split(rect); let block = Block::default() From b9d8b8f9c3e8f4b33cf11b546b96b5fe134906eb Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 27 Nov 2023 19:08:38 -0800 Subject: [PATCH 018/186] [update-engine] fix GroupDisplayStats to avoid integer underflow (#4561) This could happen if an empty `EventReport` is passed in -- in that case we'd transition to `Running` but return `NotStarted`. Fix this by not transitioning `self.kind` to `Running` if we're going to return `NotStarted`. This does bloat up the code a little but I think is clearer overall. Thanks to @jgallagher for all the help debugging this! Also clean up some related logic and add tests. Fixes #4507. --- .../examples/update-engine-basic/display.rs | 1 + update-engine/src/buffer.rs | 228 +-------- update-engine/src/display/group_display.rs | 454 +++++++++++++++--- update-engine/src/test_utils.rs | 284 ++++++++++- wicket/src/cli/rack_update.rs | 1 + 5 files changed, 683 insertions(+), 285 deletions(-) diff --git a/update-engine/examples/update-engine-basic/display.rs b/update-engine/examples/update-engine-basic/display.rs index 122777211b..891bdce6d3 100644 --- a/update-engine/examples/update-engine-basic/display.rs +++ b/update-engine/examples/update-engine-basic/display.rs @@ -88,6 +88,7 @@ async fn display_group( slog::info!(log, "setting up display"); let mut display = GroupDisplay::new( + log, [ (GroupDisplayKey::Example, "example"), (GroupDisplayKey::Other, "other"), diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 6e0e66d6d0..36a0626963 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -1627,6 +1627,16 @@ pub enum TerminalKind { Aborted, } +impl fmt::Display for TerminalKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Completed => write!(f, "completed"), + Self::Failed => write!(f, "failed"), + Self::Aborted => write!(f, "aborted"), + } + } +} + impl ExecutionStatus { /// Returns the terminal status and the total amount of time elapsed, or /// None if the execution has not reached a terminal state. @@ -1671,17 +1681,13 @@ mod tests { use std::collections::HashSet; use anyhow::{bail, ensure, Context}; - use futures::StreamExt; use indexmap::IndexSet; use omicron_test_utils::dev::test_setup_log; use serde::{de::IntoDeserializer, Deserialize}; - use tokio::sync::mpsc; - use tokio_stream::wrappers::ReceiverStream; use crate::{ - events::{ProgressCounter, ProgressUnits, StepProgress}, - test_utils::TestSpec, - StepContext, StepSuccess, UpdateEngine, + events::ProgressCounter, + test_utils::{generate_test_events, GenerateTestEventsKind, TestSpec}, }; use super::*; @@ -1689,108 +1695,11 @@ mod tests { #[tokio::test] async fn test_buffer() { let logctx = test_setup_log("test_buffer"); - // The channel is big enough to contain all possible events. - let (sender, receiver) = mpsc::channel(512); - let engine: UpdateEngine = - UpdateEngine::new(&logctx.log, sender); - - engine - .new_step("foo".to_owned(), 1, "Step 1", move |_cx| async move { - StepSuccess::new(()).into() - }) - .register(); - - engine - .new_step("bar".to_owned(), 2, "Step 2", move |cx| async move { - for _ in 0..20 { - cx.send_progress(StepProgress::with_current_and_total( - 5, - 20, - ProgressUnits::BYTES, - Default::default(), - )) - .await; - - cx.send_progress(StepProgress::reset( - Default::default(), - "reset step 2", - )) - .await; - - cx.send_progress(StepProgress::retry("retry step 2")).await; - } - StepSuccess::new(()).into() - }) - .register(); - - engine - .new_step( - "nested".to_owned(), - 3, - "Step 3 (this is nested)", - move |parent_cx| async move { - parent_cx - .with_nested_engine(|engine| { - define_nested_engine(&parent_cx, engine); - Ok(()) - }) - .await - .expect_err("this is expected to fail"); - - StepSuccess::new(()).into() - }, - ) - .register(); - - let log = logctx.log.clone(); - engine - .new_step( - "remote-nested".to_owned(), - 20, - "Step 4 (remote nested)", - move |cx| async move { - let (sender, mut receiver) = mpsc::channel(16); - let mut engine = UpdateEngine::new(&log, sender); - define_remote_nested_engine(&mut engine, 20); - - let mut buffer = EventBuffer::default(); - - let mut execute_fut = std::pin::pin!(engine.execute()); - let mut execute_done = false; - loop { - tokio::select! { - res = &mut execute_fut, if !execute_done => { - res.expect("remote nested engine completed successfully"); - execute_done = true; - } - Some(event) = receiver.recv() => { - // Generate complete reports to ensure deduping - // happens within StepContexts. - buffer.add_event(event); - cx.send_nested_report(buffer.generate_report()).await?; - } - else => { - break; - } - } - } - - StepSuccess::new(()).into() - }, - ) - .register(); - - // The step index here (100) is large enough to be higher than all nested - // steps. - engine - .new_step("baz".to_owned(), 100, "Step 5", move |_cx| async move { - StepSuccess::new(()).into() - }) - .register(); - - engine.execute().await.expect("execution successful"); - let generated_events: Vec<_> = - ReceiverStream::new(receiver).collect().await; + let generated_events = generate_test_events( + &logctx.log, + GenerateTestEventsKind::Completed, + ) + .await; let test_cx = BufferTestContext::new(generated_events); @@ -2417,71 +2326,6 @@ mod tests { } } - fn define_nested_engine<'a>( - parent_cx: &'a StepContext, - engine: &mut UpdateEngine<'a, TestSpec>, - ) { - engine - .new_step( - "nested-foo".to_owned(), - 4, - "Nested step 1", - move |cx| async move { - parent_cx - .send_progress(StepProgress::with_current_and_total( - 1, - 3, - "steps", - Default::default(), - )) - .await; - cx.send_progress( - StepProgress::progress(Default::default()), - ) - .await; - StepSuccess::new(()).into() - }, - ) - .register(); - - engine - .new_step::<_, _, ()>( - "nested-bar".to_owned(), - 5, - "Nested step 2 (fails)", - move |cx| async move { - // This is used by NestedProgressCheck below. - parent_cx - .send_progress(StepProgress::with_current_and_total( - 2, - 3, - "steps", - Default::default(), - )) - .await; - - cx.send_progress(StepProgress::with_current( - 50, - "units", - Default::default(), - )) - .await; - - parent_cx - .send_progress(StepProgress::with_current_and_total( - 3, - 3, - "steps", - Default::default(), - )) - .await; - - bail!("failing step") - }, - ) - .register(); - } - #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum NestedProgressCheck { Initial, @@ -2530,42 +2374,4 @@ mod tests { ); } } - - fn define_remote_nested_engine( - engine: &mut UpdateEngine<'_, TestSpec>, - start_id: usize, - ) { - engine - .new_step( - "nested-foo".to_owned(), - start_id + 1, - "Nested step 1", - move |cx| async move { - cx.send_progress( - StepProgress::progress(Default::default()), - ) - .await; - StepSuccess::new(()).into() - }, - ) - .register(); - - engine - .new_step::<_, _, ()>( - "nested-bar".to_owned(), - start_id + 2, - "Nested step 2", - move |cx| async move { - cx.send_progress(StepProgress::with_current( - 20, - "units", - Default::default(), - )) - .await; - - StepSuccess::new(()).into() - }, - ) - .register(); - } } diff --git a/update-engine/src/display/group_display.rs b/update-engine/src/display/group_display.rs index 0d50489a9f..cfd37aac16 100644 --- a/update-engine/src/display/group_display.rs +++ b/update-engine/src/display/group_display.rs @@ -30,6 +30,7 @@ use super::{ pub struct GroupDisplay { // We don't need to add any buffering here because we already write data to // the writer in a line-buffered fashion (see Self::write_events). + log: slog::Logger, writer: W, max_width: usize, // This is set to the highest value of root_total_elapsed seen from any event reports. @@ -45,6 +46,7 @@ impl GroupDisplay { /// /// The function passed in is expected to create a writer. pub fn new( + log: &slog::Logger, keys_and_prefixes: impl IntoIterator, writer: W, ) -> Self @@ -70,6 +72,7 @@ impl GroupDisplay { let not_started = single_states.len(); Self { + log: log.new(slog::o!("component" => "GroupDisplay")), writer, max_width, // This creates the stopwatch in the stopped state with duration 0 -- i.e. a minimal @@ -84,6 +87,7 @@ impl GroupDisplay { /// Creates a new `GroupDisplay` with the provided report keys, using the /// `Display` impl to obtain the respective prefixes. pub fn new_with_display( + log: &slog::Logger, keys: impl IntoIterator, writer: W, ) -> Self @@ -91,6 +95,7 @@ impl GroupDisplay { K: fmt::Display, { Self::new( + log, keys.into_iter().map(|k| { let prefix = k.to_string(); (k, prefix) @@ -144,7 +149,30 @@ impl GroupDisplay { TokioSw::with_elapsed_started(root_total_elapsed); } } + self.stats.apply_result(result); + + if result.before != result.after { + slog::info!( + self.log, + "add_event_report caused state transition"; + "prefix" => &state.prefix, + "before" => %result.before, + "after" => %result.after, + "current_stats" => ?self.stats, + "root_total_elapsed" => ?result.root_total_elapsed, + ); + } else { + slog::trace!( + self.log, + "add_event_report called, state did not change"; + "prefix" => &state.prefix, + "state" => %result.before, + "current_stats" => ?self.stats, + "root_total_elapsed" => ?result.root_total_elapsed, + ); + } + Ok(()) } else { Err(UnknownReportKey {}) @@ -179,7 +207,7 @@ impl GroupDisplay { } } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct GroupDisplayStats { /// The total number of reports. pub total: usize, @@ -236,18 +264,9 @@ impl GroupDisplayStats { } fn apply_result(&mut self, result: AddEventReportResult) { - // Process result.after first to avoid integer underflow. - match result.after { - SingleStateTag::NotStarted => self.not_started += 1, - SingleStateTag::Running => self.running += 1, - SingleStateTag::Terminal(TerminalKind::Completed) => { - self.completed += 1 - } - SingleStateTag::Terminal(TerminalKind::Failed) => self.failed += 1, - SingleStateTag::Terminal(TerminalKind::Aborted) => { - self.aborted += 1 - } - SingleStateTag::Overwritten => self.overwritten += 1, + if result.before == result.after { + // Nothing to do. + return; } match result.before { @@ -262,6 +281,19 @@ impl GroupDisplayStats { } SingleStateTag::Overwritten => self.overwritten -= 1, } + + match result.after { + SingleStateTag::NotStarted => self.not_started += 1, + SingleStateTag::Running => self.running += 1, + SingleStateTag::Terminal(TerminalKind::Completed) => { + self.completed += 1 + } + SingleStateTag::Terminal(TerminalKind::Failed) => self.failed += 1, + SingleStateTag::Terminal(TerminalKind::Aborted) => { + self.aborted += 1 + } + SingleStateTag::Overwritten => self.overwritten += 1, + } } fn format_line( @@ -336,92 +368,139 @@ impl SingleState { &mut self, event_report: EventReport, ) -> AddEventReportResult { - let before = match &self.kind { + match &mut self.kind { SingleStateKind::NotStarted { .. } => { - self.kind = SingleStateKind::Running { - event_buffer: EventBuffer::new(8), + // We're starting a new update. + let before = SingleStateTag::NotStarted; + let mut event_buffer = EventBuffer::default(); + let (after, root_total_elapsed) = + match Self::apply_report(&mut event_buffer, event_report) { + ApplyReportResult::NotStarted => { + // This means that the event report was empty. Don't + // update `self.kind`. + (SingleStateTag::NotStarted, None) + } + ApplyReportResult::Running(root_total_elapsed) => { + self.kind = + SingleStateKind::Running { event_buffer }; + (SingleStateTag::Running, Some(root_total_elapsed)) + } + ApplyReportResult::Terminal(info) => { + let terminal_kind = info.kind; + let root_total_elapsed = info.root_total_elapsed; + + self.kind = SingleStateKind::Terminal { + info, + pending_event_buffer: Some(event_buffer), + }; + ( + SingleStateTag::Terminal(terminal_kind), + root_total_elapsed, + ) + } + ApplyReportResult::Overwritten => { + self.kind = SingleStateKind::Overwritten { + displayed: false, + }; + (SingleStateTag::Overwritten, None) + } + }; + + AddEventReportResult { before, after, root_total_elapsed } + } + SingleStateKind::Running { event_buffer } => { + // We're in the middle of an update. + let before = SingleStateTag::Running; + let (after, root_total_elapsed) = match Self::apply_report( + event_buffer, + event_report, + ) { + ApplyReportResult::NotStarted => { + // This is an illegal state transition: once a + // non-empty event report has been received, the + // event buffer never goes back to the NotStarted + // state. + unreachable!("illegal state transition from Running to NotStarted") + } + ApplyReportResult::Running(root_total_elapsed) => { + (SingleStateTag::Running, Some(root_total_elapsed)) + } + ApplyReportResult::Terminal(info) => { + let terminal_kind = info.kind; + let root_total_elapsed = info.root_total_elapsed; + + // Grab the event buffer so we can store it in the + // Terminal state below. + let event_buffer = std::mem::replace( + event_buffer, + EventBuffer::new(0), + ); + + self.kind = SingleStateKind::Terminal { + info, + pending_event_buffer: Some(event_buffer), + }; + ( + SingleStateTag::Terminal(terminal_kind), + root_total_elapsed, + ) + } + ApplyReportResult::Overwritten => { + self.kind = + SingleStateKind::Overwritten { displayed: false }; + (SingleStateTag::Overwritten, None) + } }; - SingleStateTag::NotStarted + AddEventReportResult { before, after, root_total_elapsed } } - SingleStateKind::Running { .. } => SingleStateTag::Running, - SingleStateKind::Terminal { info, .. } => { // Once we've reached a terminal state, we don't record any more // events. - return AddEventReportResult::unchanged( + AddEventReportResult::unchanged( SingleStateTag::Terminal(info.kind), info.root_total_elapsed, - ); + ) } SingleStateKind::Overwritten { .. } => { // This update has already completed -- assume that the event // buffer is for a new update, which we don't show. - return AddEventReportResult::unchanged( + AddEventReportResult::unchanged( SingleStateTag::Overwritten, None, - ); + ) } - }; - - let SingleStateKind::Running { event_buffer } = &mut self.kind else { - unreachable!("other branches were handled above"); - }; + } + } + /// The internal logic used by [`Self::add_event_report`]. + fn apply_report( + event_buffer: &mut EventBuffer, + event_report: EventReport, + ) -> ApplyReportResult { if let Some(root_execution_id) = event_buffer.root_execution_id() { if event_report.root_execution_id != Some(root_execution_id) { // The report is for a different execution ID -- assume that // this event is completed and mark our current execution as // completed. - self.kind = SingleStateKind::Overwritten { displayed: false }; - return AddEventReportResult { - before, - after: SingleStateTag::Overwritten, - root_total_elapsed: None, - }; + return ApplyReportResult::Overwritten; } } event_buffer.add_event_report(event_report); - let (after, max_total_elapsed) = - match event_buffer.root_execution_summary() { - Some(summary) => { - match summary.execution_status { - ExecutionStatus::NotStarted => { - (SingleStateTag::NotStarted, None) - } - ExecutionStatus::Running { - root_total_elapsed: max_total_elapsed, - .. - } => (SingleStateTag::Running, Some(max_total_elapsed)), - ExecutionStatus::Terminal(info) => { - // Grab the event buffer to store it in the terminal state. - let event_buffer = std::mem::replace( - event_buffer, - EventBuffer::new(0), - ); - let terminal_kind = info.kind; - let root_total_elapsed = info.root_total_elapsed; - self.kind = SingleStateKind::Terminal { - info, - pending_event_buffer: Some(event_buffer), - }; - ( - SingleStateTag::Terminal(terminal_kind), - root_total_elapsed, - ) - } - } + match event_buffer.root_execution_summary() { + Some(summary) => match summary.execution_status { + ExecutionStatus::NotStarted => ApplyReportResult::NotStarted, + ExecutionStatus::Running { root_total_elapsed, .. } => { + ApplyReportResult::Running(root_total_elapsed) } - None => { - // We don't have a summary yet. - (SingleStateTag::NotStarted, None) + ExecutionStatus::Terminal(info) => { + ApplyReportResult::Terminal(info) } - }; - - AddEventReportResult { - before, - after, - root_total_elapsed: max_total_elapsed, + }, + None => { + // We don't have a summary yet. + ApplyReportResult::NotStarted + } } } @@ -488,6 +567,7 @@ enum SingleStateKind { }, } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] struct AddEventReportResult { before: SingleStateTag, after: SingleStateTag, @@ -503,10 +583,238 @@ impl AddEventReportResult { } } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] enum SingleStateTag { NotStarted, Running, Terminal(TerminalKind), Overwritten, } + +impl fmt::Display for SingleStateTag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NotStarted => write!(f, "not started"), + Self::Running => write!(f, "running"), + Self::Terminal(kind) => write!(f, "{kind}"), + Self::Overwritten => write!(f, "overwritten"), + } + } +} + +#[derive(Clone, Debug)] +enum ApplyReportResult { + NotStarted, + Running(Duration), + Terminal(ExecutionTerminalInfo), + Overwritten, +} + +#[cfg(test)] +mod tests { + use omicron_test_utils::dev::test_setup_log; + + use super::*; + + use crate::test_utils::{generate_test_events, GenerateTestEventsKind}; + + #[tokio::test] + async fn test_stats() { + let logctx = test_setup_log("test_stats"); + // Generate three sets of events, one for each kind. + let generated_completed = generate_test_events( + &logctx.log, + GenerateTestEventsKind::Completed, + ) + .await; + let generated_failed = + generate_test_events(&logctx.log, GenerateTestEventsKind::Failed) + .await; + let generated_aborted = + generate_test_events(&logctx.log, GenerateTestEventsKind::Aborted) + .await; + + // Set up a `GroupDisplay` with three keys. + let mut group_display = GroupDisplay::new_with_display( + &logctx.log, + vec![ + GroupDisplayKey::Completed, + GroupDisplayKey::Failed, + GroupDisplayKey::Aborted, + GroupDisplayKey::Overwritten, + ], + std::io::stdout(), + ); + + let mut expected_stats = GroupDisplayStats { + total: 4, + not_started: 4, + running: 0, + completed: 0, + failed: 0, + aborted: 0, + overwritten: 0, + }; + assert_eq!(group_display.stats(), &expected_stats); + assert!(!expected_stats.is_terminal()); + assert!(!expected_stats.has_failures()); + + // Pass in an empty EventReport -- ensure that this doesn't move it to + // a Running state. + + group_display + .add_event_report( + &GroupDisplayKey::Completed, + EventReport::default(), + ) + .unwrap(); + assert_eq!(group_display.stats(), &expected_stats); + + // Pass in events one by one -- ensure that we're always in the running + // state until we've completed. + { + expected_stats.not_started -= 1; + expected_stats.running += 1; + + let n = generated_completed.len(); + + let mut buffer = EventBuffer::default(); + let mut last_seen = None; + + for (i, event) in + generated_completed.clone().into_iter().enumerate() + { + buffer.add_event(event); + let report = buffer.generate_report_since(&mut last_seen); + group_display + .add_event_report(&GroupDisplayKey::Completed, report) + .unwrap(); + if i == n - 1 { + // The last event should have moved us to the completed + // state. + expected_stats.running -= 1; + expected_stats.completed += 1; + } else { + // We should still be in the running state. + } + assert_eq!(group_display.stats(), &expected_stats); + assert!(!expected_stats.is_terminal()); + assert!(!expected_stats.has_failures()); + } + } + + // Pass in failed events, this time using buffer.generate_report() + // rather than buffer.generate_report_since(). + { + expected_stats.not_started -= 1; + expected_stats.running += 1; + + let n = generated_failed.len(); + + let mut buffer = EventBuffer::default(); + for (i, event) in generated_failed.clone().into_iter().enumerate() { + buffer.add_event(event); + let report = buffer.generate_report(); + group_display + .add_event_report(&GroupDisplayKey::Failed, report) + .unwrap(); + if i == n - 1 { + // The last event should have moved us to the failed state. + expected_stats.running -= 1; + expected_stats.failed += 1; + assert!(expected_stats.has_failures()); + } else { + // We should still be in the running state. + assert!(!expected_stats.has_failures()); + } + assert_eq!(group_display.stats(), &expected_stats); + } + } + + // Pass in aborted events all at once. + { + expected_stats.not_started -= 1; + expected_stats.running += 1; + + let mut buffer = EventBuffer::default(); + for event in generated_aborted { + buffer.add_event(event); + } + let report = buffer.generate_report(); + group_display + .add_event_report(&GroupDisplayKey::Aborted, report) + .unwrap(); + // The aborted events should have moved us to the aborted state. + expected_stats.running -= 1; + expected_stats.aborted += 1; + assert_eq!(group_display.stats(), &expected_stats); + + // Try passing in one of the events that, if we were running, would + // cause us to move to an overwritten state. Ensure that that does + // not happen (i.e. expected_stats stays the same) + let mut buffer = EventBuffer::default(); + buffer.add_event(generated_failed.first().unwrap().clone()); + let report = buffer.generate_report(); + group_display + .add_event_report(&GroupDisplayKey::Aborted, report) + .unwrap(); + assert_eq!(group_display.stats(), &expected_stats); + } + + // For the overwritten state, pass in half of the completed events, and + // then pass in all of the failed events. + + { + expected_stats.not_started -= 1; + expected_stats.running += 1; + + let mut buffer = EventBuffer::default(); + let n = generated_completed.len() / 2; + for event in generated_completed.into_iter().take(n) { + buffer.add_event(event); + } + let report = buffer.generate_report(); + group_display + .add_event_report(&GroupDisplayKey::Overwritten, report) + .unwrap(); + assert_eq!(group_display.stats(), &expected_stats); + + // Now pass in a single failed event, which has a different + // execution ID. + let mut buffer = EventBuffer::default(); + buffer.add_event(generated_failed.first().unwrap().clone()); + let report = buffer.generate_report(); + group_display + .add_event_report(&GroupDisplayKey::Overwritten, report) + .unwrap(); + // The overwritten event should have moved us to the overwritten + // state. + expected_stats.running -= 1; + expected_stats.overwritten += 1; + } + + assert!(expected_stats.has_failures()); + assert!(expected_stats.is_terminal()); + + logctx.cleanup_successful(); + } + + #[derive(Debug, Eq, PartialEq, Ord, PartialOrd)] + enum GroupDisplayKey { + Completed, + Failed, + Aborted, + Overwritten, + } + + impl fmt::Display for GroupDisplayKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Completed => write!(f, "completed"), + Self::Failed => write!(f, "failed"), + Self::Aborted => write!(f, "aborted"), + Self::Overwritten => write!(f, "overwritten"), + } + } + } +} diff --git a/update-engine/src/test_utils.rs b/update-engine/src/test_utils.rs index 0bacfbeb8d..b943d1ddfe 100644 --- a/update-engine/src/test_utils.rs +++ b/update-engine/src/test_utils.rs @@ -4,9 +4,16 @@ // Copyright 2023 Oxide Computer Company +use anyhow::bail; +use futures::StreamExt; use schemars::JsonSchema; +use tokio::sync::{mpsc, oneshot}; +use tokio_stream::wrappers::ReceiverStream; -use crate::{ExecutionId, StepSpec}; +use crate::{ + events::{Event, ProgressUnits, StepProgress}, + EventBuffer, ExecutionId, StepContext, StepSpec, StepSuccess, UpdateEngine, +}; #[derive(JsonSchema)] pub(crate) enum TestSpec {} @@ -27,3 +34,278 @@ pub(crate) static TEST_EXECUTION_UUID: &str = pub fn test_execution_id() -> ExecutionId { ExecutionId(TEST_EXECUTION_UUID.parse().expect("valid UUID")) } + +#[derive(Copy, Clone, Debug)] +pub(crate) enum GenerateTestEventsKind { + Completed, + Failed, + Aborted, +} + +pub(crate) async fn generate_test_events( + log: &slog::Logger, + kind: GenerateTestEventsKind, +) -> Vec> { + // The channel is big enough to contain all possible events. + let (sender, receiver) = mpsc::channel(512); + let engine = UpdateEngine::new(log, sender); + + match kind { + GenerateTestEventsKind::Completed => { + define_test_steps(log, &engine, LastStepOutcome::Completed); + engine.execute().await.expect("execution successful"); + } + GenerateTestEventsKind::Failed => { + define_test_steps(log, &engine, LastStepOutcome::Failed); + engine.execute().await.expect_err("execution failed"); + } + GenerateTestEventsKind::Aborted => { + // In this case, the last step signals that it has been reached via + // sending a message over this channel, and then waits forever. We + // abort execution by calling into the AbortHandle. + let (sender, receiver) = oneshot::channel(); + define_test_steps(log, &engine, LastStepOutcome::Aborted(sender)); + let abort_handle = engine.abort_handle(); + let mut execute_fut = std::pin::pin!(engine.execute()); + let mut receiver = std::pin::pin!(receiver); + let mut receiver_done = false; + loop { + tokio::select! { + res = &mut execute_fut => { + res.expect_err("execution should have been aborted, but completed successfully"); + break; + } + _ = &mut receiver, if !receiver_done => { + receiver_done = true; + abort_handle + .abort("test engine deliberately aborted") + .expect("engine should still be alive"); + } + } + } + } + } + + ReceiverStream::new(receiver).collect().await +} + +#[derive(Debug)] +enum LastStepOutcome { + Completed, + Failed, + Aborted(oneshot::Sender<()>), +} + +#[derive(Debug)] +enum Never {} + +fn define_test_steps( + log: &slog::Logger, + engine: &UpdateEngine, + last_step_outcome: LastStepOutcome, +) { + engine + .new_step("foo".to_owned(), 1, "Step 1", move |_cx| async move { + StepSuccess::new(()).into() + }) + .register(); + + engine + .new_step("bar".to_owned(), 2, "Step 2", move |cx| async move { + for _ in 0..20 { + cx.send_progress(StepProgress::with_current_and_total( + 5, + 20, + ProgressUnits::BYTES, + Default::default(), + )) + .await; + + cx.send_progress(StepProgress::reset( + Default::default(), + "reset step 2", + )) + .await; + + cx.send_progress(StepProgress::retry("retry step 2")).await; + } + StepSuccess::new(()).into() + }) + .register(); + + engine + .new_step( + "nested".to_owned(), + 3, + "Step 3 (this is nested)", + move |parent_cx| async move { + parent_cx + .with_nested_engine(|engine| { + define_nested_engine(&parent_cx, engine); + Ok(()) + }) + .await + .expect_err("this is expected to fail"); + + StepSuccess::new(()).into() + }, + ) + .register(); + + let log = log.clone(); + engine + .new_step( + "remote-nested".to_owned(), + 20, + "Step 4 (remote nested)", + move |cx| async move { + let (sender, mut receiver) = mpsc::channel(16); + let mut engine = UpdateEngine::new(&log, sender); + define_remote_nested_engine(&mut engine, 20); + + let mut buffer = EventBuffer::default(); + + let mut execute_fut = std::pin::pin!(engine.execute()); + let mut execute_done = false; + loop { + tokio::select! { + res = &mut execute_fut, if !execute_done => { + res.expect("remote nested engine completed successfully"); + execute_done = true; + } + Some(event) = receiver.recv() => { + // Generate complete reports to ensure deduping + // happens within StepContexts. + buffer.add_event(event); + cx.send_nested_report(buffer.generate_report()).await?; + } + else => { + break; + } + } + } + + StepSuccess::new(()).into() + }, + ) + .register(); + + // The step index here (100) is large enough to be higher than all nested + // steps. + engine + .new_step("baz".to_owned(), 100, "Step 5", move |_cx| async move { + match last_step_outcome { + LastStepOutcome::Completed => StepSuccess::new(()).into(), + LastStepOutcome::Failed => { + bail!("last step failed") + } + LastStepOutcome::Aborted(sender) => { + sender.send(()).expect("receiver should be alive"); + // The driver of the engine is responsible for aborting it + // at this point. + std::future::pending::().await; + unreachable!("pending future can never resolve"); + } + } + }) + .register(); +} + +fn define_nested_engine<'a>( + parent_cx: &'a StepContext, + engine: &mut UpdateEngine<'a, TestSpec>, +) { + engine + .new_step( + "nested-foo".to_owned(), + 4, + "Nested step 1", + move |cx| async move { + parent_cx + .send_progress(StepProgress::with_current_and_total( + 1, + 3, + "steps", + Default::default(), + )) + .await; + cx.send_progress(StepProgress::progress(Default::default())) + .await; + StepSuccess::new(()).into() + }, + ) + .register(); + + engine + .new_step::<_, _, ()>( + "nested-bar".to_owned(), + 5, + "Nested step 2 (fails)", + move |cx| async move { + // This is used by NestedProgressCheck below. + parent_cx + .send_progress(StepProgress::with_current_and_total( + 2, + 3, + "steps", + Default::default(), + )) + .await; + + cx.send_progress(StepProgress::with_current( + 50, + "units", + Default::default(), + )) + .await; + + parent_cx + .send_progress(StepProgress::with_current_and_total( + 3, + 3, + "steps", + Default::default(), + )) + .await; + + bail!("failing step") + }, + ) + .register(); +} + +fn define_remote_nested_engine( + engine: &mut UpdateEngine<'_, TestSpec>, + start_id: usize, +) { + engine + .new_step( + "nested-foo".to_owned(), + start_id + 1, + "Nested step 1", + move |cx| async move { + cx.send_progress(StepProgress::progress(Default::default())) + .await; + StepSuccess::new(()).into() + }, + ) + .register(); + + engine + .new_step::<_, _, ()>( + "nested-bar".to_owned(), + start_id + 2, + "Nested step 2", + move |cx| async move { + cx.send_progress(StepProgress::with_current( + 20, + "units", + Default::default(), + )) + .await; + + StepSuccess::new(()).into() + }, + ) + .register(); +} diff --git a/wicket/src/cli/rack_update.rs b/wicket/src/cli/rack_update.rs index fa41fa7b8c..cac0f09ee5 100644 --- a/wicket/src/cli/rack_update.rs +++ b/wicket/src/cli/rack_update.rs @@ -174,6 +174,7 @@ async fn do_attach_to_updates( output: CommandOutput<'_>, ) -> Result<()> { let mut display = GroupDisplay::new_with_display( + &log, update_ids.iter().copied(), output.stderr, ); From 9dcc32d98ec9a9bc2c137c6b4ac77730ebe38c8f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 05:24:00 +0000 Subject: [PATCH 019/186] Update taiki-e/install-action digest to c1dd9c9 (#4562) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`8f354f3` -> `c1dd9c9`](https://togithub.com/taiki-e/install-action/compare/8f354f3...c1dd9c9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d79c836fba..c006a41f35 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@8f354f35e51028c902e8ab954045e37739acf562 # v2 + uses: taiki-e/install-action@c1dd9c9e59427252db32b9ece987f4eebc3a021a # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 19a01c20253044b73e1cb8846fd8b6d77543fdf4 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 06:43:07 +0000 Subject: [PATCH 020/186] Update Rust crate percent-encoding to 2.3.1 (#4563) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e4588efbde..04d7a1374d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -282,7 +282,7 @@ p256 = "0.13" parse-display = "0.8.2" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } paste = "1.0.14" -percent-encoding = "2.3.0" +percent-encoding = "2.3.1" pem = "1.1" petgraph = "0.6.4" postgres-protocol = "0.6.6" From 55b39533cfe9a3f2fc1185adaa9c2118efaee6bf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 10:01:57 -0800 Subject: [PATCH 021/186] Update Rust crate camino-tempfile to 1.1.1 (#4565) --- Cargo.lock | 40 ++++++++++++++++++++++++--------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 22 +++++++++++++-------- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 07f804b03d..76107c8f4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -324,7 +324,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4d45f362125ed144544e57b0ec6de8fd6a296d41a6252fc4a20c0cf12e9ed3a" dependencies = [ - "rustix 0.38.9", + "rustix 0.38.25", "tempfile", "windows-sys 0.48.0", ] @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "camino-tempfile" -version = "1.0.2" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ab15a83d13f75dbd86f082bdefd160b628476ef58d3b900a0ef74e001bb097" +checksum = "cb905055fa81e4d427f919b2cd0d76a998267de7d225ea767a1894743a5263c2" dependencies = [ "camino", "tempfile", @@ -2151,7 +2151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ "cfg-if 1.0.0", - "rustix 0.38.9", + "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -3383,7 +3383,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix 0.38.9", + "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -3636,9 +3636,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "lock_api" @@ -4935,6 +4935,7 @@ dependencies = [ "diesel", "digest", "either", + "errno", "flate2", "futures", "futures-channel", @@ -4979,7 +4980,7 @@ dependencies = [ "regex-syntax 0.8.2", "reqwest", "ring 0.16.20", - "rustix 0.38.9", + "rustix 0.38.25", "schemars", "semver 1.0.20", "serde", @@ -6421,6 +6422,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -6872,14 +6882,14 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.9" +version = "0.38.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bfe0f2582b4931a45d1fa608f8a8722e8b3c7ac54dd6d5f3b3212791fedef49" +checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" dependencies = [ "bitflags 2.4.0", "errno", "libc", - "linux-raw-sys 0.4.5", + "linux-raw-sys 0.4.11", "windows-sys 0.48.0", ] @@ -8170,14 +8180,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if 1.0.0", "fastrand", - "redox_syscall 0.3.5", - "rustix 0.38.9", + "redox_syscall 0.4.1", + "rustix 0.38.25", "windows-sys 0.48.0", ] diff --git a/Cargo.toml b/Cargo.toml index 04d7a1374d..239fb453dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,7 +160,7 @@ byteorder = "1.5.0" bytes = "1.5.0" bytesize = "1.3.0" camino = "1.1" -camino-tempfile = "1.0.2" +camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" ciborium = "0.2.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1a289bd0cb..7757b4ad8b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -209,58 +209,64 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } From 30d41911f3682e21f34ec041a651c3f206600894 Mon Sep 17 00:00:00 2001 From: liffy <629075+lifning@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:02:08 -0800 Subject: [PATCH 022/186] Refactor InstalledZone::install to use a builder pattern, per TODO. (#4325) Additionally, make a builder-factory with an option to create fake builders, in service of refactoring some things to enable some unit tests being written. --- Cargo.lock | 1 + illumos-utils/Cargo.toml | 1 + illumos-utils/src/running_zone.rs | 238 +++++++++++++++++++++++++---- sled-agent/src/instance.rs | 48 +++--- sled-agent/src/instance_manager.rs | 9 ++ sled-agent/src/services.rs | 41 ++--- sled-agent/src/sled_agent.rs | 2 + 7 files changed, 272 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76107c8f4e..108c8b182d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3072,6 +3072,7 @@ dependencies = [ "bhyve_api", "byteorder", "camino", + "camino-tempfile", "cfg-if 1.0.0", "crucible-smf", "futures", diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 497454e047..8296eace5c 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -11,6 +11,7 @@ async-trait.workspace = true bhyve_api.workspace = true byteorder.workspace = true camino.workspace = true +camino-tempfile.workspace = true cfg-if.workspace = true crucible-smf.workspace = true futures.workspace = true diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ba8cd009e8..ea80a6d34b 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -11,10 +11,12 @@ use crate::opte::{Port, PortTicket}; use crate::svc::wait_for_service; use crate::zone::{AddressRequest, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; +use camino_tempfile::Utf8TempDir; use ipnetwork::IpNetwork; use omicron_common::backoff; use slog::{error, info, o, warn, Logger}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::sync::Arc; #[cfg(target_os = "illumos")] use std::sync::OnceLock; #[cfg(target_os = "illumos")] @@ -1043,7 +1045,7 @@ pub struct ServiceProcess { pub log_file: Utf8PathBuf, } -/// Errors returned from [`InstalledZone::install`]. +/// Errors returned from [`ZoneBuilder::install`]. #[derive(thiserror::Error, Debug)] pub enum InstallZoneError { #[error("Cannot create '{zone}': failed to create control VNIC: {err}")] @@ -1063,6 +1065,9 @@ pub enum InstallZoneError { #[error("Failed to find zone image '{image}' from {paths:?}")] ImageNotFound { image: String, paths: Vec }, + + #[error("Attempted to call install() on underspecified ZoneBuilder")] + IncompleteBuilder, } pub struct InstalledZone { @@ -1119,24 +1124,208 @@ impl InstalledZone { &self.zonepath } - // TODO: This would benefit from a "builder-pattern" interface. - #[allow(clippy::too_many_arguments)] - pub async fn install( - log: &Logger, - underlay_vnic_allocator: &VnicAllocator, - zone_root_path: &Utf8Path, - zone_image_paths: &[Utf8PathBuf], - zone_type: &str, - unique_name: Option, - datasets: &[zone::Dataset], - filesystems: &[zone::Fs], - data_links: &[String], - devices: &[zone::Device], - opte_ports: Vec<(Port, PortTicket)>, - bootstrap_vnic: Option, - links: Vec, - limit_priv: Vec, - ) -> Result { + pub fn site_profile_xml_path(&self) -> Utf8PathBuf { + let mut path: Utf8PathBuf = self.zonepath().into(); + path.push("root/var/svc/profile/site.xml"); + path + } +} + +#[derive(Clone)] +pub struct FakeZoneBuilderConfig { + temp_dir: Arc, +} + +#[derive(Clone, Default)] +pub struct ZoneBuilderFactory { + // Why this is part of this builder/factory and not some separate builder + // type: At time of writing, to the best of my knowledge: + // - If we want builder pattern, we need to return some type of `Self`. + // - If we have a trait that returns `Self` type, we can't turn it into a + // trait object (i.e. Box). + // - Plumbing concrete types as generics through every other type that + // needs to construct zones (and anything else with a lot of parameters) + // seems like a worse idea. + fake_cfg: Option, +} + +impl ZoneBuilderFactory { + /// For use in unit tests that don't require actual zone creation to occur. + pub fn fake() -> Self { + Self { + fake_cfg: Some(FakeZoneBuilderConfig { + temp_dir: Arc::new(Utf8TempDir::new().unwrap()), + }), + } + } + + /// Create a [ZoneBuilder] that inherits this factory's fakeness. + pub fn builder<'a>(&self) -> ZoneBuilder<'a> { + ZoneBuilder { fake_cfg: self.fake_cfg.clone(), ..Default::default() } + } +} + +/// Builder-pattern construct for creating an [InstalledZone]. +/// Created by [ZoneBuilderFactory]. +#[derive(Default)] +pub struct ZoneBuilder<'a> { + log: Option, + underlay_vnic_allocator: Option<&'a VnicAllocator>, + zone_root_path: Option<&'a Utf8Path>, + zone_image_paths: Option<&'a [Utf8PathBuf]>, + zone_type: Option<&'a str>, + unique_name: Option, // actually optional + datasets: Option<&'a [zone::Dataset]>, + filesystems: Option<&'a [zone::Fs]>, + data_links: Option<&'a [String]>, + devices: Option<&'a [zone::Device]>, + opte_ports: Option>, + bootstrap_vnic: Option, // actually optional + links: Option>, + limit_priv: Option>, + fake_cfg: Option, +} + +impl<'a> ZoneBuilder<'a> { + pub fn with_log(mut self, log: Logger) -> Self { + self.log = Some(log); + self + } + + pub fn with_underlay_vnic_allocator( + mut self, + vnic_allocator: &'a VnicAllocator, + ) -> Self { + self.underlay_vnic_allocator = Some(vnic_allocator); + self + } + + pub fn with_zone_root_path(mut self, root_path: &'a Utf8Path) -> Self { + self.zone_root_path = Some(root_path); + self + } + + pub fn with_zone_image_paths( + mut self, + image_paths: &'a [Utf8PathBuf], + ) -> Self { + self.zone_image_paths = Some(image_paths); + self + } + + pub fn with_zone_type(mut self, zone_type: &'a str) -> Self { + self.zone_type = Some(zone_type); + self + } + + pub fn with_unique_name(mut self, uuid: Uuid) -> Self { + self.unique_name = Some(uuid); + self + } + + pub fn with_datasets(mut self, datasets: &'a [zone::Dataset]) -> Self { + self.datasets = Some(datasets); + self + } + + pub fn with_filesystems(mut self, filesystems: &'a [zone::Fs]) -> Self { + self.filesystems = Some(filesystems); + self + } + + pub fn with_data_links(mut self, links: &'a [String]) -> Self { + self.data_links = Some(links); + self + } + + pub fn with_devices(mut self, devices: &'a [zone::Device]) -> Self { + self.devices = Some(devices); + self + } + + pub fn with_opte_ports(mut self, ports: Vec<(Port, PortTicket)>) -> Self { + self.opte_ports = Some(ports); + self + } + + pub fn with_bootstrap_vnic(mut self, vnic: Link) -> Self { + self.bootstrap_vnic = Some(vnic); + self + } + + pub fn with_links(mut self, links: Vec) -> Self { + self.links = Some(links); + self + } + + pub fn with_limit_priv(mut self, limit_priv: Vec) -> Self { + self.limit_priv = Some(limit_priv); + self + } + + fn fake_install(self) -> Result { + let zone = self + .zone_type + .ok_or(InstallZoneError::IncompleteBuilder)? + .to_string(); + let control_vnic = self + .underlay_vnic_allocator + .ok_or(InstallZoneError::IncompleteBuilder)? + .new_control(None) + .map_err(move |err| InstallZoneError::CreateVnic { zone, err })?; + let fake_cfg = self.fake_cfg.unwrap(); + let temp_dir = fake_cfg.temp_dir.path().to_path_buf(); + (|| { + let full_zone_name = InstalledZone::get_zone_name( + self.zone_type?, + self.unique_name, + ); + let zonepath = temp_dir + .join(self.zone_root_path?.strip_prefix("/").unwrap()) + .join(&full_zone_name); + let iz = InstalledZone { + log: self.log?, + zonepath, + name: full_zone_name, + control_vnic, + bootstrap_vnic: self.bootstrap_vnic, + opte_ports: self.opte_ports?, + links: self.links?, + }; + let xml_path = iz.site_profile_xml_path().parent()?.to_path_buf(); + std::fs::create_dir_all(&xml_path) + .unwrap_or_else(|_| panic!("ZoneBuilder::fake_install couldn't create site profile xml path {:?}", xml_path)); + Some(iz) + })() + .ok_or(InstallZoneError::IncompleteBuilder) + } + + pub async fn install(self) -> Result { + if self.fake_cfg.is_some() { + return self.fake_install(); + } + + let Self { + log: Some(log), + underlay_vnic_allocator: Some(underlay_vnic_allocator), + zone_root_path: Some(zone_root_path), + zone_image_paths: Some(zone_image_paths), + zone_type: Some(zone_type), + unique_name, + datasets: Some(datasets), + filesystems: Some(filesystems), + data_links: Some(data_links), + devices: Some(devices), + opte_ports: Some(opte_ports), + bootstrap_vnic, + links: Some(links), + limit_priv: Some(limit_priv), + .. + } = self + else { + return Err(InstallZoneError::IncompleteBuilder); + }; + let control_vnic = underlay_vnic_allocator.new_control(None).map_err(|err| { InstallZoneError::CreateVnic { @@ -1145,7 +1334,8 @@ impl InstalledZone { } })?; - let full_zone_name = Self::get_zone_name(zone_type, unique_name); + let full_zone_name = + InstalledZone::get_zone_name(zone_type, unique_name); // Looks for the image within `zone_image_path`, in order. let image = format!("{}.tar.gz", zone_type); @@ -1183,7 +1373,7 @@ impl InstalledZone { net_device_names.dedup(); Zones::install_omicron_zone( - log, + &log, &zone_root_path, &full_zone_name, &zone_image_path, @@ -1210,12 +1400,6 @@ impl InstalledZone { links, }) } - - pub fn site_profile_xml_path(&self) -> Utf8PathBuf { - let mut path: Utf8PathBuf = self.zonepath().into(); - path.push("root/var/svc/profile/site.xml"); - path - } } /// Return true if the service with the given FMRI appears to be an diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index a6f022f5f2..c37f0ffde6 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -26,7 +26,7 @@ use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::{DhcpCfg, PortManager}; -use illumos_utils::running_zone::{InstalledZone, RunningZone}; +use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; @@ -226,6 +226,9 @@ struct InstanceInner { // Storage resources storage: StorageHandle, + // Used to create propolis zones + zone_builder_factory: ZoneBuilderFactory, + // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, @@ -611,6 +614,7 @@ impl Instance { port_manager, storage, zone_bundler, + zone_builder_factory, } = services; let mut dhcp_config = DhcpCfg { @@ -678,6 +682,7 @@ impl Instance { running_state: None, nexus_client, storage, + zone_builder_factory, zone_bundler, instance_ticket: ticket, }; @@ -904,31 +909,28 @@ impl Instance { .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)? .clone(); - let installed_zone = InstalledZone::install( - &inner.log, - &inner.vnic_allocator, - &root, - &["/opt/oxide".into()], - "propolis-server", - Some(*inner.propolis_id()), - // dataset= - &[], - // filesystems= - &[], - // data_links= - &[], - &[ + let installed_zone = inner + .zone_builder_factory + .builder() + .with_log(inner.log.clone()) + .with_underlay_vnic_allocator(&inner.vnic_allocator) + .with_zone_root_path(&root) + .with_zone_image_paths(&["/opt/oxide".into()]) + .with_zone_type("propolis-server") + .with_unique_name(*inner.propolis_id()) + .with_datasets(&[]) + .with_filesystems(&[]) + .with_data_links(&[]) + .with_devices(&[ zone::Device { name: "/dev/vmm/*".to_string() }, zone::Device { name: "/dev/vmmctl".to_string() }, zone::Device { name: "/dev/viona".to_string() }, - ], - opte_ports, - // physical_nic= - None, - vec![], - vec![], - ) - .await?; + ]) + .with_opte_ports(opte_ports) + .with_links(vec![]) + .with_limit_priv(vec![]) + .install() + .await?; let gateway = inner.port_manager.underlay_ip(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fa40a876f0..c1b7e402a4 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -17,6 +17,7 @@ use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; +use illumos_utils::running_zone::ZoneBuilderFactory; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; @@ -76,6 +77,7 @@ struct InstanceManagerInternal { port_manager: PortManager, storage: StorageHandle, zone_bundler: ZoneBundler, + zone_builder_factory: ZoneBuilderFactory, } pub(crate) struct InstanceManagerServices { @@ -84,6 +86,7 @@ pub(crate) struct InstanceManagerServices { pub port_manager: PortManager, pub storage: StorageHandle, pub zone_bundler: ZoneBundler, + pub zone_builder_factory: ZoneBuilderFactory, } /// All instances currently running on the sled. @@ -100,6 +103,7 @@ impl InstanceManager { port_manager: PortManager, storage: StorageHandle, zone_bundler: ZoneBundler, + zone_builder_factory: ZoneBuilderFactory, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { @@ -113,6 +117,7 @@ impl InstanceManager { port_manager, storage, zone_bundler, + zone_builder_factory, }), }) } @@ -266,6 +271,10 @@ impl InstanceManager { port_manager: self.inner.port_manager.clone(), storage: self.inner.storage.clone(), zone_bundler: self.inner.zone_bundler.clone(), + zone_builder_factory: self + .inner + .zone_builder_factory + .clone(), }; let state = crate::instance::InstanceInitialState { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b87c91768b..2caa640e22 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -53,7 +53,7 @@ use illumos_utils::dladm::{ use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket}; use illumos_utils::running_zone::{ - InstalledZone, RunCommandError, RunningZone, + InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory, }; use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT; use illumos_utils::zone::AddressRequest; @@ -1103,23 +1103,28 @@ impl ServiceManager { .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET)); } - let installed_zone = InstalledZone::install( - &self.inner.log, - &self.inner.underlay_vnic_allocator, - &request.root, - zone_image_paths.as_slice(), - &request.zone.zone_type.to_string(), - unique_name, - datasets.as_slice(), - &filesystems, - &data_links, - &devices, - opte_ports, - bootstrap_vnic, - links, - limit_priv, - ) - .await?; + let mut zone_builder = ZoneBuilderFactory::default().builder(); + if let Some(uuid) = unique_name { + zone_builder = zone_builder.with_unique_name(uuid); + } + if let Some(vnic) = bootstrap_vnic { + zone_builder = zone_builder.with_bootstrap_vnic(vnic); + } + let installed_zone = zone_builder + .with_log(self.inner.log.clone()) + .with_underlay_vnic_allocator(&self.inner.underlay_vnic_allocator) + .with_zone_root_path(&request.root) + .with_zone_image_paths(zone_image_paths.as_slice()) + .with_zone_type(&request.zone.zone_type.to_string()) + .with_datasets(datasets.as_slice()) + .with_filesystems(&filesystems) + .with_data_links(&data_links) + .with_devices(&devices) + .with_opte_ports(opte_ports) + .with_links(links) + .with_limit_priv(limit_priv) + .install() + .await?; // TODO(https://github.com/oxidecomputer/omicron/issues/1898): // diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index cfa8c5d7ca..f5b71106cd 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -68,6 +68,7 @@ use std::sync::Arc; use tokio::sync::oneshot; use uuid::Uuid; +use illumos_utils::running_zone::ZoneBuilderFactory; #[cfg(not(test))] use illumos_utils::{dladm::Dladm, zone::Zones}; #[cfg(test)] @@ -382,6 +383,7 @@ impl SledAgent { port_manager.clone(), storage_manager.clone(), long_running_task_handles.zone_bundler.clone(), + ZoneBuilderFactory::default(), )?; // Configure the VMM reservoir as either a percentage of DRAM or as an From 91b0261ec2446ef74bb7934536784fe65a40ce2c Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 13:35:57 -0800 Subject: [PATCH 023/186] Update maghemite to 579592b (#4567) --- package-manifest.toml | 8 ++++---- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index ca96341f2a..26c45f0ff7 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -425,7 +425,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" @@ -441,7 +441,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" @@ -456,10 +456,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" +source.sha256 = "82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 76bdb9ca92..f60ea76380 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" +COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index d6d1788cbc..649db53f6e 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" +COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" SHA2="6c1fab8d5028b52a161d8bf02aae47844699cdc5f7b28e1ac519fc4ec1ab3971" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 9657147159..08b04d6b67 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" +CIDL_SHA256="82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" MGD_LINUX_SHA256="81231b30872fa1c581aa22c101f32d11f33f335758ac1fd2653436fbc7aab93f" \ No newline at end of file From 0a6966cbfc0bafe5f93a26c480e6223390d4451d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 05:21:06 +0000 Subject: [PATCH 024/186] Update taiki-e/install-action digest to f7c663c (#4574) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`c1dd9c9` -> `f7c663c`](https://togithub.com/taiki-e/install-action/compare/c1dd9c9...f7c663c) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index c006a41f35..1805da8ad8 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@c1dd9c9e59427252db32b9ece987f4eebc3a021a # v2 + uses: taiki-e/install-action@f7c663c03b51ed0d93e9cec22a575d3f02175989 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 67cd482cd4f6f15ed3a9b42ba7eed10c57199b84 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Nov 2023 23:57:21 -0800 Subject: [PATCH 025/186] [nexus] add sled provision state (#4520) Add the notion of a sled provision state to Nexus. Currently, we will only use this to prevent new resources and regions from being provisioned to sleds. This PR includes: 1. Database updates and schema migrations. 2. Database APIs in `nexus-db-queries`. 3. An HTTP API. 4. Tests for resource and region allocation. --- Cargo.lock | 6 +- nexus/db-model/Cargo.toml | 1 + nexus/db-model/src/lib.rs | 9 +- .../db-model/src/queries/region_allocation.rs | 2 + nexus/db-model/src/schema.rs | 3 +- nexus/db-model/src/sled.rs | 11 +- nexus/db-model/src/sled_provision_state.rs | 58 ++++++ nexus/db-queries/src/db/datastore/mod.rs | 86 ++++++++- nexus/db-queries/src/db/datastore/sled.rs | 171 ++++++++++++++++-- .../src/db/queries/region_allocation.rs | 10 +- nexus/src/app/sled.rs | 15 ++ nexus/src/external_api/http_entrypoints.rs | 42 +++++ nexus/tests/integration_tests/endpoints.rs | 15 ++ nexus/tests/integration_tests/schema.rs | 12 +- nexus/tests/output/nexus_tags.txt | 1 + nexus/types/Cargo.toml | 1 + nexus/types/src/external_api/params.rs | 17 ++ nexus/types/src/external_api/views.rs | 27 +++ openapi/nexus.json | 127 +++++++++++++ schema/crdb/15.0.0/up1.sql | 6 + schema/crdb/15.0.0/up2.sql | 3 + schema/crdb/15.0.0/up3.sql | 5 + schema/crdb/dbinit.sql | 12 +- 23 files changed, 607 insertions(+), 33 deletions(-) create mode 100644 nexus/db-model/src/sled_provision_state.rs create mode 100644 schema/crdb/15.0.0/up1.sql create mode 100644 schema/crdb/15.0.0/up2.sql create mode 100644 schema/crdb/15.0.0/up3.sql diff --git a/Cargo.lock b/Cargo.lock index 108c8b182d..532fcde59f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1671,9 +1671,9 @@ dependencies = [ [[package]] name = "diesel_derives" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e054665eaf6d97d1e7125512bb2d35d07c73ac86cc6920174cb42d1ab697a554" +checksum = "ef8337737574f55a468005a83499da720f20c65586241ffea339db9ecdfd2b44" dependencies = [ "diesel_table_macro_syntax", "proc-macro2", @@ -3993,6 +3993,7 @@ dependencies = [ "sled-agent-client", "steno", "strum", + "thiserror", "uuid", ] @@ -4178,6 +4179,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "serde_with", "steno", "strum", "uuid", diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index b7514c4806..477ce7d11f 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -26,6 +26,7 @@ serde.workspace = true serde_json.workspace = true steno.workspace = true strum.workspace = true +thiserror.workspace = true uuid.workspace = true db-macros.workspace = true diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index ac5bad26f8..43bf83fd34 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -70,6 +70,7 @@ mod silo_user; mod silo_user_password_hash; mod sled; mod sled_instance; +mod sled_provision_state; mod sled_resource; mod sled_resource_kind; mod sled_underlay_subnet_allocation; @@ -152,6 +153,7 @@ pub use silo_user::*; pub use silo_user_password_hash::*; pub use sled::*; pub use sled_instance::*; +pub use sled_provision_state::*; pub use sled_resource::*; pub use sled_resource_kind::*; pub use sled_underlay_subnet_allocation::*; @@ -287,10 +289,9 @@ macro_rules! impl_enum_type { Ok($model_type::$enum_item) } )* - _ => { - Err(concat!("Unrecognized enum variant for ", - stringify!{$model_type}) - .into()) + other => { + let s = concat!("Unrecognized enum variant for ", stringify!{$model_type}); + Err(format!("{}: (raw bytes: {:?})", s, other).into()) } } } diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs index 2025e79fb8..a1b9e0373a 100644 --- a/nexus/db-model/src/queries/region_allocation.rs +++ b/nexus/db-model/src/queries/region_allocation.rs @@ -23,6 +23,7 @@ // a CTE (where we want the alias name to come first). use crate::schema::dataset; +use crate::schema::sled; use crate::schema::zpool; table! { @@ -157,6 +158,7 @@ diesel::allow_tables_to_appear_in_same_query!( diesel::allow_tables_to_appear_in_same_query!( old_zpool_usage, zpool, + sled, proposed_dataset_changes, ); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index afeac5e6cd..6527da3637 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -741,6 +741,7 @@ table! { ip -> Inet, port -> Int4, last_used_address -> Inet, + provision_state -> crate::SledProvisionStateEnum, } } @@ -1299,7 +1300,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(14, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 4c82aa5d23..0f6d1b911e 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -4,8 +4,8 @@ use super::{ByteCount, Generation, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; -use crate::ipv6; use crate::schema::{physical_disk, service, sled, zpool}; +use crate::{ipv6, SledProvisionState}; use chrono::{DateTime, Utc}; use db_macros::Asset; use nexus_types::{external_api::shared, external_api::views, identity::Asset}; @@ -59,6 +59,8 @@ pub struct Sled { /// The last IP address provided to an Oxide service on this sled pub last_used_address: ipv6::Ipv6Addr, + + provision_state: SledProvisionState, } impl Sled { @@ -81,6 +83,10 @@ impl Sled { pub fn serial_number(&self) -> &str { &self.serial_number } + + pub fn provision_state(&self) -> SledProvisionState { + self.provision_state + } } impl From for views::Sled { @@ -93,6 +99,7 @@ impl From for views::Sled { part: sled.part_number, revision: sled.revision, }, + provision_state: sled.provision_state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, } @@ -188,6 +195,8 @@ impl SledUpdate { serial_number: self.serial_number, part_number: self.part_number, revision: self.revision, + // By default, sleds start as provisionable. + provision_state: SledProvisionState::Provisionable, usable_hardware_threads: self.usable_hardware_threads, usable_physical_ram: self.usable_physical_ram, reservoir_size: self.reservoir_size, diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs new file mode 100644 index 0000000000..6cf81b9c70 --- /dev/null +++ b/nexus/db-model/src/sled_provision_state.rs @@ -0,0 +1,58 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use nexus_types::external_api::views; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "sled_provision_state"))] + pub struct SledProvisionStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = SledProvisionStateEnum)] + pub enum SledProvisionState; + + // Enum values + Provisionable => b"provisionable" + NonProvisionable => b"non_provisionable" +); + +impl From for views::SledProvisionState { + fn from(state: SledProvisionState) -> Self { + match state { + SledProvisionState::Provisionable => { + views::SledProvisionState::Provisionable + } + SledProvisionState::NonProvisionable => { + views::SledProvisionState::NonProvisionable + } + } + } +} + +impl TryFrom for SledProvisionState { + type Error = UnknownSledProvisionState; + + fn try_from(state: views::SledProvisionState) -> Result { + match state { + views::SledProvisionState::Provisionable => { + Ok(SledProvisionState::Provisionable) + } + views::SledProvisionState::NonProvisionable => { + Ok(SledProvisionState::NonProvisionable) + } + views::SledProvisionState::Unknown => { + Err(UnknownSledProvisionState) + } + } + } +} + +/// An unknown [`views::SledProvisionState`] was encountered. +#[derive(Clone, Debug, Error)] +#[error("Unknown SledProvisionState")] +pub struct UnknownSledProvisionState; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 0612b960c9..44cd7a95b7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -372,8 +372,8 @@ mod test { BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession, Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, - SledSystemHardware, SledUpdate, SshKey, SystemUpdate, - UpdateableComponentType, VpcSubnet, Zpool, + SledProvisionState, SledSystemHardware, SledUpdate, SshKey, + SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use assert_matches::assert_matches; @@ -610,6 +610,35 @@ mod test { sled_id } + // Marks a sled as non-provisionable. + async fn mark_sled_non_provisionable( + datastore: &DataStore, + opctx: &OpContext, + sled_id: Uuid, + ) { + let (authz_sled, sled) = LookupPath::new(opctx, datastore) + .sled_id(sled_id) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + println!("sled: {:?}", sled); + let old_state = datastore + .sled_set_provision_state( + &opctx, + &authz_sled, + SledProvisionState::NonProvisionable, + ) + .await + .unwrap_or_else(|error| { + panic!( + "error marking sled {sled_id} as non-provisionable: {error}" + ) + }); + // The old state should always be provisionable since that's where we + // start. + assert_eq!(old_state, SledProvisionState::Provisionable); + } + fn test_zpool_size() -> ByteCount { ByteCount::from_gibibytes_u32(100) } @@ -770,13 +799,24 @@ mod test { let logctx = dev::test_setup_log("test_region_allocation_strat_random"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation( + let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), + // Even though we're going to mark one sled as non-provisionable to + // test that logic, we aren't forcing the datasets to be on + // distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough. REGION_REDUNDANCY_THRESHOLD, ) .await; + let non_provisionable_dataset_id = test_datasets[0].dataset_id; + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, + ) + .await; + // Allocate regions from the datasets for this disk. Do it a few times // for good measure. for alloc_seed in 0..10 { @@ -809,6 +849,9 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // Dataset must not be non-provisionable. + assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); @@ -837,12 +880,23 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - // Create a rack without enough sleds for a successful allocation when - // we require 3 distinct sleds. + // Create a rack with enough sleds for a successful allocation when we + // require 3 distinct provisionable sleds. let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), - REGION_REDUNDANCY_THRESHOLD, + // We're going to mark one sled as non-provisionable to test that + // logic, and we *are* forcing the datasets to be on distinct + // sleds: hence threshold + 1. + REGION_REDUNDANCY_THRESHOLD + 1, + ) + .await; + + let non_provisionable_dataset_id = test_datasets[0].dataset_id; + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, ) .await; @@ -884,6 +938,9 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // Dataset must not be non-provisionable. + assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); @@ -916,11 +973,22 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a rack without enough sleds for a successful allocation when - // we require 3 distinct sleds. - create_test_datasets_for_region_allocation( + // we require 3 distinct provisionable sleds. + let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), - REGION_REDUNDANCY_THRESHOLD - 1, + // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1 + // provisionable sleds to test this failure condition. We're going + // to mark one sled as non-provisionable to test that logic, so we + // need to add 1 to that number. + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, ) .await; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 130c36b496..406119a636 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -15,6 +15,7 @@ use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledUpdate; use crate::db::pagination::paginated; +use crate::db::update_and_check::UpdateAndCheck; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -153,6 +154,11 @@ impl DataStore { .and(sled_has_space_in_reservoir), ) .filter(sled_dsl::time_deleted.is_null()) + // Filter out sleds that are not provisionable. + .filter( + sled_dsl::provision_state + .eq(db::model::SledProvisionState::Provisionable), + ) .select(sled_dsl::id) .into_boxed(); @@ -217,6 +223,37 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; Ok(()) } + + /// Sets the provision state for this sled. + /// + /// Returns the previous state. + pub async fn sled_set_provision_state( + &self, + opctx: &OpContext, + authz_sled: &authz::Sled, + state: db::model::SledProvisionState, + ) -> Result { + use db::schema::sled::dsl; + + opctx.authorize(authz::Action::Modify, authz_sled).await?; + + let sled_id = authz_sled.id(); + let query = diesel::update(dsl::sled) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(sled_id)) + .filter(dsl::provision_state.ne(state)) + .set(( + dsl::provision_state.eq(state), + dsl::time_modified.eq(Utc::now()), + )) + .check_if_exists::(sled_id); + let result = query + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(result.found.provision_state()) + } } #[cfg(test)] @@ -226,12 +263,15 @@ mod test { use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; + use crate::db::lookup::LookupPath; use crate::db::model::ByteCount; use crate::db::model::SqlU32; use nexus_test_utils::db::test_setup_database; + use nexus_types::identity::Asset; use omicron_common::api::external; use omicron_test_utils::dev; use std::net::{Ipv6Addr, SocketAddrV6}; + use std::num::NonZeroU32; fn rack_id() -> Uuid { Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap() @@ -243,19 +283,9 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (_opctx, datastore) = datastore_test(&logctx, &db).await; - let sled_id = Uuid::new_v4(); - let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); - let mut sled_update = SledUpdate::new( - sled_id, - addr, - sled_baseboard_for_test(), - sled_system_hardware_for_test(), - rack_id(), - ); - let observed_sled = datastore - .sled_upsert(sled_update.clone()) - .await - .expect("Could not upsert sled during test prep"); + let mut sled_update = test_new_sled_update(); + let observed_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!( observed_sled.usable_hardware_threads, sled_update.usable_hardware_threads @@ -301,4 +331,119 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + /// Test that new reservations aren't created on non-provisionable sleds. + #[tokio::test] + async fn sled_reservation_create_non_provisionable() { + let logctx = + dev::test_setup_log("sled_reservation_create_non_provisionable"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let sled_update = test_new_sled_update(); + let non_provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + + let (authz_sled, _) = LookupPath::new(&opctx, &datastore) + .sled_id(non_provisionable_sled.id()) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + + let old_state = datastore + .sled_set_provision_state( + &opctx, + &authz_sled, + db::model::SledProvisionState::NonProvisionable, + ) + .await + .unwrap(); + assert_eq!( + old_state, + db::model::SledProvisionState::Provisionable, + "a newly created sled starts as provisionable" + ); + + // This should be an error since there are no provisionable sleds. + let resources = db::model::Resources::new( + 1, + // Just require the bare non-zero amount of RAM. + ByteCount::try_from(1024).unwrap(), + ByteCount::try_from(1024).unwrap(), + ); + let constraints = db::model::SledReservationConstraints::none(); + let error = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap_err(); + assert!(matches!(error, external::Error::ServiceUnavailable { .. })); + + // Now add a provisionable sled and try again. + let sled_update = test_new_sled_update(); + let provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + + let sleds = datastore + .sled_list(&opctx, &first_page(NonZeroU32::new(10).unwrap())) + .await + .unwrap(); + println!("sleds: {:?}", sleds); + + // Try a few times to ensure that resources never get allocated to the + // non-provisionable sled. + for _ in 0..10 { + let constraints = db::model::SledReservationConstraints::none(); + let resource = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap(); + assert_eq!( + resource.sled_id, + provisionable_sled.id(), + "resource is always allocated to the provisionable sled" + ); + + datastore + .sled_reservation_delete(&opctx, resource.id) + .await + .unwrap(); + } + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + fn test_new_sled_update() -> SledUpdate { + let sled_id = Uuid::new_v4(); + let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); + SledUpdate::new( + sled_id, + addr, + sled_baseboard_for_test(), + sled_system_hardware_for_test(), + rack_id(), + ) + } + + /// Returns pagination parameters to fetch the first page of results for a + /// paginated endpoint + fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { + DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + limit, + } + } } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a080af4c37..031be92c08 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -290,6 +290,7 @@ impl CandidateZpools { seed: u128, distinct_sleds: bool, ) -> Self { + use schema::sled::dsl as sled_dsl; use schema::zpool::dsl as zpool_dsl; // Why are we using raw `diesel::dsl::sql` here? @@ -310,13 +311,20 @@ impl CandidateZpools { + diesel::dsl::sql(&zpool_size_delta.to_string())) .le(diesel::dsl::sql(zpool_dsl::total_size::NAME)); + // We need to join on the sled table to access provision_state. + let with_sled = sled_dsl::sled.on(zpool_dsl::sled_id.eq(sled_dsl::id)); let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)); + .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)) + .inner_join(with_sled); + + let sled_is_provisionable = sled_dsl::provision_state + .eq(crate::db::model::SledProvisionState::Provisionable); let base_query = old_zpool_usage .query_source() .inner_join(with_zpool) .filter(it_will_fit) + .filter(sled_is_provisionable) .select((old_zpool_usage::dsl::pool_id,)); let query = if distinct_sleds { diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index c2931f1441..44efc2934e 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -8,6 +8,7 @@ use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; +use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; @@ -142,6 +143,20 @@ impl super::Nexus { .await } + /// Returns the old state. + pub(crate) async fn sled_set_provision_state( + &self, + opctx: &OpContext, + sled_lookup: &lookup::Sled<'_>, + state: db::model::SledProvisionState, + ) -> Result { + let (authz_sled,) = + sled_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore + .sled_set_provision_state(opctx, &authz_sled, state) + .await + } + // Physical disks pub(crate) async fn sled_list_physical_disks( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 78f675c28a..f1302f4a73 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -218,6 +218,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(rack_view)?; api.register(sled_list)?; api.register(sled_view)?; + api.register(sled_set_provision_state)?; api.register(sled_instance_list)?; api.register(sled_physical_disk_list)?; api.register(physical_disk_list)?; @@ -4483,6 +4484,47 @@ async fn sled_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Set the sled's provision state. +#[endpoint { + method = PUT, + path = "/v1/system/hardware/sleds/{sled_id}/provision-state", + tags = ["system/hardware"], +}] +async fn sled_set_provision_state( + rqctx: RequestContext>, + path_params: Path, + new_provision_state: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let path = path_params.into_inner(); + let provision_state = new_provision_state.into_inner().state; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + // Convert the external `SledProvisionState` into our internal data model. + let new_state = + db::model::SledProvisionState::try_from(provision_state).map_err( + |error| HttpError::for_bad_request(None, format!("{error}")), + )?; + + let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; + + let old_state = nexus + .sled_set_provision_state(&opctx, &sled_lookup, new_state) + .await?; + + let response = params::SledProvisionStateResponse { + old_state: old_state.into(), + new_state: new_state.into(), + }; + + Ok(HttpResponseOk(response)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List instances running on a given sled #[endpoint { method = GET, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 5dfdcc151d..536b96f7ae 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -50,6 +50,12 @@ lazy_static! { format!("/v1/system/hardware/uninitialized-sleds"); pub static ref HARDWARE_SLED_URL: String = format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID); + pub static ref HARDWARE_SLED_PROVISION_STATE_URL: String = + format!("/v1/system/hardware/sleds/{}/provision-state", SLED_AGENT_UUID); + pub static ref DEMO_SLED_PROVISION_STATE: params::SledProvisionStateParams = + params::SledProvisionStateParams { + state: nexus_types::external_api::views::SledProvisionState::NonProvisionable, + }; pub static ref HARDWARE_SWITCH_URL: String = format!("/v1/system/hardware/switches/{}", SWITCH_UUID); pub static ref HARDWARE_DISK_URL: String = @@ -1609,6 +1615,15 @@ lazy_static! { allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &HARDWARE_SLED_PROVISION_STATE_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![AllowedMethod::Put( + serde_json::to_value(&*DEMO_SLED_PROVISION_STATE).unwrap() + )], + }, + VerifyEndpoint { url: "/v1/system/hardware/switches", visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 213e7f9e4f..6feafe415d 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -629,7 +629,17 @@ impl InformationSchema { self.referential_constraints, other.referential_constraints ); - similar_asserts::assert_eq!(self.statistics, other.statistics); + similar_asserts::assert_eq!( + self.statistics, + other.statistics, + "Statistics did not match. This often means that in dbinit.sql, a new \ + column was added into the middle of a table rather than to the end. \ + If that is the case:\n\n \ + \ + * Change dbinit.sql to add the column to the end of the table.\n\ + * Update nexus/db-model/src/schema.rs and the corresponding \ + Queryable/Insertable struct with the new column ordering." + ); similar_asserts::assert_eq!(self.sequences, other.sequences); similar_asserts::assert_eq!(self.pg_indexes, other.pg_indexes); } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index dd387ab979..7e57d00df2 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -120,6 +120,7 @@ rack_view GET /v1/system/hardware/racks/{rac sled_instance_list GET /v1/system/hardware/sleds/{sled_id}/instances sled_list GET /v1/system/hardware/sleds sled_physical_disk_list GET /v1/system/hardware/sleds/{sled_id}/disks +sled_set_provision_state PUT /v1/system/hardware/sleds/{sled_id}/provision-state sled_view GET /v1/system/hardware/sleds/{sled_id} switch_list GET /v1/system/hardware/switches switch_view GET /v1/system/hardware/switches/{switch_id} diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 9cb94a8484..8cbbd8626c 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -14,6 +14,7 @@ parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true serde_json.workspace = true +serde_with.workspace = true steno.workspace = true strum.workspace = true uuid.workspace = true diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a0169ae777..a5f1f3f874 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -75,6 +75,23 @@ pub struct SledSelector { pub sled: Uuid, } +/// Parameters for `sled_set_provision_state`. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct SledProvisionStateParams { + /// The provision state. + pub state: super::views::SledProvisionState, +} + +/// Response to `sled_set_provision_state`. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct SledProvisionStateResponse { + /// The old provision state. + pub old_state: super::views::SledProvisionState, + + /// The new provision state. + pub new_state: super::views::SledProvisionState, +} + pub struct SwitchSelector { /// ID of the switch pub switch: Uuid, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 9dfe36d63b..6d02623f34 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use serde_with::rust::deserialize_ignore_any; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::net::IpAddr; @@ -286,12 +287,38 @@ pub struct Sled { pub baseboard: Baseboard, /// The rack to which this Sled is currently attached pub rack_id: Uuid, + /// The provision state of the sled. + pub provision_state: SledProvisionState, /// The number of hardware threads which can execute on this sled pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, } +/// The provision state of a sled. +/// +/// This controls whether new resources are going to be provisioned on this +/// sled. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +#[serde(rename_all = "snake_case")] +pub enum SledProvisionState { + /// New resources will be provisioned on this sled. + Provisionable, + + /// New resources will not be provisioned on this sled. However, existing + /// resources will continue to be on this sled unless manually migrated + /// off. + NonProvisionable, + + /// This is a state that isn't known yet. + /// + /// This is defined to avoid API breakage. + #[serde(other, deserialize_with = "deserialize_ignore_any")] + Unknown, +} + /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/openapi/nexus.json b/openapi/nexus.json index 704aa393db..08e6cd7149 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3817,6 +3817,55 @@ } } }, + "/v1/system/hardware/sleds/{sled_id}/provision-state": { + "put": { + "tags": [ + "system/hardware" + ], + "summary": "Set the sled's provision state.", + "operationId": "sled_set_provision_state", + "parameters": [ + { + "in": "path", + "name": "sled_id", + "description": "ID of the sled", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledProvisionStateParams" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledProvisionStateResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/hardware/switch-port": { "get": { "tags": [ @@ -12976,6 +13025,14 @@ "type": "string", "format": "uuid" }, + "provision_state": { + "description": "The provision state of the sled.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + }, "rack_id": { "description": "The rack to which this Sled is currently attached", "type": "string", @@ -13009,6 +13066,7 @@ "required": [ "baseboard", "id", + "provision_state", "rack_id", "time_created", "time_modified", @@ -13099,6 +13157,75 @@ "items" ] }, + "SledProvisionState": { + "description": "The provision state of a sled.\n\nThis controls whether new resources are going to be provisioned on this sled.", + "oneOf": [ + { + "description": "New resources will be provisioned on this sled.", + "type": "string", + "enum": [ + "provisionable" + ] + }, + { + "description": "New resources will not be provisioned on this sled. However, existing resources will continue to be on this sled unless manually migrated off.", + "type": "string", + "enum": [ + "non_provisionable" + ] + }, + { + "description": "This is a state that isn't known yet.\n\nThis is defined to avoid API breakage.", + "type": "string", + "enum": [ + "unknown" + ] + } + ] + }, + "SledProvisionStateParams": { + "description": "Parameters for `sled_set_provision_state`.", + "type": "object", + "properties": { + "state": { + "description": "The provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + } + }, + "required": [ + "state" + ] + }, + "SledProvisionStateResponse": { + "description": "Response to `sled_set_provision_state`.", + "type": "object", + "properties": { + "new_state": { + "description": "The new provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + }, + "old_state": { + "description": "The old provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + } + }, + "required": [ + "new_state", + "old_state" + ] + }, "SledResultsPage": { "description": "A single page of results", "type": "object", diff --git a/schema/crdb/15.0.0/up1.sql b/schema/crdb/15.0.0/up1.sql new file mode 100644 index 0000000000..04baa76370 --- /dev/null +++ b/schema/crdb/15.0.0/up1.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state AS ENUM ( + -- New resources can be provisioned onto the sled + 'provisionable', + -- New resources must not be provisioned onto the sled + 'non_provisionable' +); diff --git a/schema/crdb/15.0.0/up2.sql b/schema/crdb/15.0.0/up2.sql new file mode 100644 index 0000000000..e3ea2ba11c --- /dev/null +++ b/schema/crdb/15.0.0/up2.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.sled + ADD COLUMN IF NOT EXISTS provision_state omicron.public.sled_provision_state + NOT NULL DEFAULT 'provisionable'; diff --git a/schema/crdb/15.0.0/up3.sql b/schema/crdb/15.0.0/up3.sql new file mode 100644 index 0000000000..aaa3feac20 --- /dev/null +++ b/schema/crdb/15.0.0/up3.sql @@ -0,0 +1,5 @@ +-- Drop the default column value for provision_state -- it should always be set +-- by Nexus. +ALTER TABLE omicron.public.sled + ALTER COLUMN provision_state + DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 728b084982..178c7af913 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -73,6 +73,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.rack ( * Sleds */ +CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state AS ENUM ( + -- New resources can be provisioned onto the sled + 'provisionable', + -- New resources must not be provisioned onto the sled + 'non_provisionable' +); + CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* Identity metadata (asset) */ id UUID PRIMARY KEY, @@ -104,6 +111,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* The last address allocated to an Oxide service on this sled. */ last_used_address INET NOT NULL, + /* The state of whether resources should be provisioned onto the sled */ + provision_state omicron.public.sled_provision_state NOT NULL, + -- This constraint should be upheld, even for deleted disks -- in the fleet. CONSTRAINT serial_part_revision_unique UNIQUE ( @@ -2997,7 +3007,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '14.0.0', NULL) + ( TRUE, NOW(), NOW(), '15.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 7f8b82e5ec266d94c5a94e0aa987f4edc81b3116 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 00:25:00 -0800 Subject: [PATCH 026/186] Update Rust crate zeroize to 1.7.0 (#4542) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 532fcde59f..a0e8361d79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9895,9 +9895,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" dependencies = [ "zeroize_derive", ] diff --git a/Cargo.toml b/Cargo.toml index 239fb453dc..78abe273e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -389,7 +389,7 @@ walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } wicketd-client = { path = "clients/wicketd-client" } -zeroize = { version = "1.6.0", features = ["zeroize_derive", "std"] } +zeroize = { version = "1.7.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } zone = { version = "0.3", default-features = false, features = ["async"] } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7757b4ad8b..fe7c3bdc81 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -105,7 +105,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } +zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [build-dependencies] @@ -201,7 +201,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } +zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] From bb7ee841d38318a3316c5749babae3112ed074a2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:15:50 -0800 Subject: [PATCH 027/186] Update Rust crate pretty-hex to 0.4.0 (#4576) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0e8361d79..6580e1de55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1791,7 +1791,7 @@ dependencies = [ "omicron-workspace-hack", "openapi-lint", "openapiv3 1.0.3", - "pretty-hex 0.3.0", + "pretty-hex 0.4.0", "schemars", "serde", "serde_json", @@ -5978,9 +5978,9 @@ checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" [[package]] name = "pretty-hex" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" +checksum = "23c6b968ed37d62e35b4febaba13bfa231b0b7929d68b8a94e65445a17e2d35f" [[package]] name = "pretty_assertions" diff --git a/Cargo.toml b/Cargo.toml index 78abe273e0..694cd2c8dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -288,7 +288,7 @@ petgraph = "0.6.4" postgres-protocol = "0.6.6" predicates = "3.0.4" pretty_assertions = "1.4.0" -pretty-hex = "0.3.0" +pretty-hex = "0.4.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } From a4e12168c6c418317f980c16dea7801660781d7c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 Nov 2023 11:57:01 -0800 Subject: [PATCH 028/186] [nexus] Make 'update_and_check' CTE explicitly request columns (#4572) Related to https://github.com/oxidecomputer/omicron/issues/4570 , but not a direct fix for it This PR removes a usage of ".\*" from a SQL query. Using ".\*" in sql queries is somewhat risky -- it makes an implicit dependency on order, and can make backwards compatibility difficult in certain circumstances. Instead, this PR provides a `ColumnWalker`, for converting a tuple of columns to an iterator, and requests the expected columns explicitly. --- nexus/db-queries/src/db/column_walker.rs | 112 ++++++++++++++++++++ nexus/db-queries/src/db/mod.rs | 1 + nexus/db-queries/src/db/update_and_check.rs | 48 +++++---- 3 files changed, 141 insertions(+), 20 deletions(-) create mode 100644 nexus/db-queries/src/db/column_walker.rs diff --git a/nexus/db-queries/src/db/column_walker.rs b/nexus/db-queries/src/db/column_walker.rs new file mode 100644 index 0000000000..64c3b450c8 --- /dev/null +++ b/nexus/db-queries/src/db/column_walker.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CTE utility for iterating over all columns in a table. + +use diesel::prelude::*; +use std::marker::PhantomData; + +/// Used to iterate over a tuple of columns ("T"). +/// +/// Diesel exposes "AllColumns" as a tuple, which is difficult to iterate over +/// -- after all, all the types are distinct. However, each of these types +/// implements "Column", so we can use a macro to provide a +/// "convertion-to-iterator" implemenation for our expected tuples. +pub(crate) struct ColumnWalker { + remaining: PhantomData, +} + +impl ColumnWalker { + pub fn new() -> Self { + Self { remaining: PhantomData } + } +} + +macro_rules! impl_column_walker { + ( $len:literal $($column:ident)+ ) => ( + impl<$($column: Column),+> IntoIterator for ColumnWalker<($($column,)+)> { + type Item = &'static str; + type IntoIter = std::array::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + [$($column::NAME,)+].into_iter() + } + } + ); +} + +// implementations for 1 - 32 columns +impl_column_walker! { 1 A } +impl_column_walker! { 2 A B } +impl_column_walker! { 3 A B C } +impl_column_walker! { 4 A B C D } +impl_column_walker! { 5 A B C D E } +impl_column_walker! { 6 A B C D E F } +impl_column_walker! { 7 A B C D E F G } +impl_column_walker! { 8 A B C D E F G H } +impl_column_walker! { 9 A B C D E F G H I } +impl_column_walker! { 10 A B C D E F G H I J } +impl_column_walker! { 11 A B C D E F G H I J K } +impl_column_walker! { 12 A B C D E F G H I J K L } +impl_column_walker! { 13 A B C D E F G H I J K L M } +impl_column_walker! { 14 A B C D E F G H I J K L M N } +impl_column_walker! { 15 A B C D E F G H I J K L M N O } +impl_column_walker! { 16 A B C D E F G H I J K L M N O P } +impl_column_walker! { 17 A B C D E F G H I J K L M N O P Q } +impl_column_walker! { 18 A B C D E F G H I J K L M N O P Q R } +impl_column_walker! { 19 A B C D E F G H I J K L M N O P Q R S } +impl_column_walker! { 20 A B C D E F G H I J K L M N O P Q R S T } +impl_column_walker! { 21 A B C D E F G H I J K L M N O P Q R S T U } +impl_column_walker! { 22 A B C D E F G H I J K L M N O P Q R S T U V } +impl_column_walker! { 23 A B C D E F G H I J K L M N O P Q R S T U V W } +impl_column_walker! { 24 A B C D E F G H I J K L M N O P Q R S T U V W X } +impl_column_walker! { 25 A B C D E F G H I J K L M N O P Q R S T U V W X Y } +impl_column_walker! { 26 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z } +impl_column_walker! { 27 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 } +impl_column_walker! { 28 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 } +impl_column_walker! { 29 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 } +impl_column_walker! { 30 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 } +impl_column_walker! { 31 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 E1 } +impl_column_walker! { 32 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 E1 F1 } + +#[cfg(test)] +mod test { + use super::*; + + table! { + test_schema.test_table (id) { + id -> Uuid, + value -> Int4, + time_deleted -> Nullable, + } + } + + // We can convert all a tables columns into an iteratable format. + #[test] + fn test_walk_table() { + let all_columns = + ColumnWalker::<::AllColumns>::new(); + + let mut iter = all_columns.into_iter(); + assert_eq!(iter.next(), Some("id")); + assert_eq!(iter.next(), Some("value")); + assert_eq!(iter.next(), Some("time_deleted")); + assert_eq!(iter.next(), None); + } + + // We can, if we want to, also make a ColumnWalker out of an arbitrary tuple + // of columns. + #[test] + fn test_walk_columns() { + let all_columns = ColumnWalker::<( + test_table::columns::id, + test_table::columns::value, + )>::new(); + + let mut iter = all_columns.into_iter(); + assert_eq!(iter.next(), Some("id")); + assert_eq!(iter.next(), Some("value")); + assert_eq!(iter.next(), None); + } +} diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 8b7424a056..b7c7079b54 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -12,6 +12,7 @@ pub mod collection_attach; pub mod collection_detach; pub mod collection_detach_many; pub mod collection_insert; +mod column_walker; mod config; mod cte_utils; // This is marked public for use by the integration tests diff --git a/nexus/db-queries/src/db/update_and_check.rs b/nexus/db-queries/src/db/update_and_check.rs index d6bf14c083..fed79d5254 100644 --- a/nexus/db-queries/src/db/update_and_check.rs +++ b/nexus/db-queries/src/db/update_and_check.rs @@ -4,6 +4,7 @@ //! CTE implementation for "UPDATE with extended return status". +use super::column_walker::ColumnWalker; use super::pool::DbConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::associations::HasTable; @@ -21,7 +22,7 @@ use std::marker::PhantomData; /// allows referencing generics with names (and extending usage /// without re-stating those generic parameters everywhere). pub trait UpdateStatementExt { - type Table: QuerySource; + type Table: Table + QuerySource; type WhereClause; type Changeset; @@ -32,7 +33,7 @@ pub trait UpdateStatementExt { impl UpdateStatementExt for UpdateStatement where - T: QuerySource, + T: Table + QuerySource, { type Table = T; type WhereClause = U; @@ -201,11 +202,11 @@ where /// /// ```text /// // WITH found AS (SELECT FROM T WHERE ) -/// // updated AS (UPDATE T SET RETURNING *) +/// // updated AS (UPDATE T SET RETURNING ) /// // SELECT /// // found. /// // updated. -/// // found.* +/// // found. /// // FROM /// // found /// // LEFT JOIN @@ -217,41 +218,48 @@ impl QueryFragment for UpdateAndQueryStatement where US: UpdateStatementExt, US::Table: HasTable + Table, + ColumnWalker<<::Table as Table>::AllColumns>: + IntoIterator, PrimaryKey: diesel::Column, UpdateStatement: QueryFragment, { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + let primary_key = as Column>::NAME; + out.push_sql("WITH found AS ("); self.find_subquery.walk_ast(out.reborrow())?; out.push_sql("), updated AS ("); self.update_statement.walk_ast(out.reborrow())?; - // TODO: Only need primary? Or would we actually want - // to pass the returned rows back through the result? - out.push_sql(" RETURNING *) "); + out.push_sql(" RETURNING "); + out.push_identifier(primary_key)?; + out.push_sql(") "); out.push_sql("SELECT"); - let name = as Column>::NAME; out.push_sql(" found."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; out.push_sql(", updated."); - out.push_identifier(name)?; - // TODO: I'd prefer to list all columns explicitly. But how? - // The types exist within Table::AllColumns, and each one - // has a name as "::Name". - // But Table::AllColumns is a tuple, which makes iteration - // a pain. - // - // TODO: Technically, we're repeating the PK here. - out.push_sql(", found.*"); + out.push_identifier(primary_key)?; + + // List all the "found" columns explicitly. + // This admittedly repeats the primary key, but that keeps the query + // "simple" since it returns all columns in the same order as + // AllColumns. + let all_columns = ColumnWalker::< + <::Table as Table>::AllColumns, + >::new(); + for column in all_columns.into_iter() { + out.push_sql(", found."); + out.push_identifier(column)?; + } out.push_sql(" FROM found LEFT JOIN updated ON"); out.push_sql(" found."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; out.push_sql(" = "); out.push_sql("updated."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; Ok(()) } From 22a70e489db5c91f1215535463abed10aa0e9db2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 Nov 2023 11:58:22 -0800 Subject: [PATCH 029/186] Stop panicking when our accounting is wrong (#4568) Prefer to return a 500 error instead of panicking. Since this function is already called from a transactional context, we can rely on the rollback mechanism to "undo" the deletion. Fixes https://github.com/oxidecomputer/omicron/issues/3870 --- .../db/datastore/virtual_provisioning_collection.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 83856e10c7..c5c2751723 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -124,10 +124,12 @@ impl DataStore { .get_result_async(conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - assert!( - collection.is_empty(), - "Collection deleted while non-empty: {collection:?}" - ); + + if !collection.is_empty() { + return Err(Error::internal_error(&format!( + "Collection deleted while non-empty: {collection:?}" + ))); + } Ok(()) } From f24447b0d93d339e70904fccb2f0a2c421db01e0 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 29 Nov 2023 12:03:48 -0800 Subject: [PATCH 030/186] Improve oximeter self-stat tests (#4577) Reduces the tick interval in calls to `tokio::time::advance()` to ensure all timers complete reliably. See #4566 for context. --- oximeter/collector/src/agent.rs | 51 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index f6da172909..365527ef08 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -659,6 +659,24 @@ mod tests { use tokio::time::Instant; use uuid::Uuid; + // Interval on which oximeter collects from producers in these tests. + const COLLECTION_INTERVAL: Duration = Duration::from_secs(1); + + // Interval in calls to `tokio::time::advance`. This must be sufficiently + // small relative to `COLLECTION_INTERVAL` to ensure all ticks of internal + // timers complete as expected. + const TICK_INTERVAL: Duration = Duration::from_millis(10); + + // Total number of collection attempts. + const N_COLLECTIONS: u64 = 5; + + // Period these tests wait using `tokio::time::advance()` before checking + // their test conditions. + const TEST_WAIT_PERIOD: Duration = Duration::from_millis( + COLLECTION_INTERVAL.as_millis() as u64 * N_COLLECTIONS + + COLLECTION_INTERVAL.as_millis() as u64 / 2, + ); + // Test that we count successful collections from a target correctly. #[tokio::test] async fn test_self_stat_collection_count() { @@ -692,13 +710,12 @@ mod tests { let _task = tokio::task::spawn(server); // Register the dummy producer. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), address, base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -708,10 +725,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -729,7 +744,7 @@ mod tests { .await .expect("failed to request statistics from task"); let stats = rx.await.expect("failed to receive statistics from task"); - assert_eq!(stats.collections.datum.value(), N_COLLECTIONS as u64); + assert_eq!(stats.collections.datum.value(), N_COLLECTIONS); assert!(stats.failed_collections.is_empty()); logctx.cleanup_successful(); } @@ -751,7 +766,6 @@ mod tests { // Register a bogus producer, which is equivalent to a producer that is // unreachable. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), @@ -762,7 +776,7 @@ mod tests { 0, )), base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -772,10 +786,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -801,7 +813,7 @@ mod tests { .unwrap() .datum .value(), - N_COLLECTIONS as u64 + N_COLLECTIONS, ); assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); @@ -840,13 +852,12 @@ mod tests { let _task = tokio::task::spawn(server); // Register the rather flaky producer. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), address, base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -856,10 +867,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -885,7 +894,7 @@ mod tests { .unwrap() .datum .value(), - N_COLLECTIONS as u64 + N_COLLECTIONS, ); assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); From 75ccdad5cbe7213c4be70c56376dac95a424d882 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 29 Nov 2023 14:26:36 -0800 Subject: [PATCH 031/186] Make oximeter producer kind required (#4571) - Pulls in updated Dendrite, Propolis, and Crucible deps, which include the new producer kind enum in metric registration requests. From their perspective, this is still an optional parameter, but it is supplied. - Make the kind a required field in API requests. - Make the kind a required column in the database, and remove any rows with a NULL value. - Update OpenAPI documents and internal consumers to reflect the required parameter. --- clients/nexus-client/src/lib.rs | 2 +- clients/oximeter-client/src/lib.rs | 2 +- common/src/api/internal/nexus.rs | 2 +- nexus/db-model/src/producer_endpoint.rs | 4 ++-- nexus/db-model/src/schema.rs | 2 +- nexus/src/app/oximeter.rs | 6 ++---- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/oximeter.rs | 2 +- openapi/nexus-internal.json | 4 ++-- openapi/oximeter.json | 4 ++-- oximeter/collector/src/agent.rs | 6 +++--- oximeter/producer/examples/producer.rs | 2 +- package-manifest.toml | 24 +++++++++++------------ schema/crdb/15.0.0/up01.sql | 14 +++++++++++++ schema/crdb/15.0.0/up02.sql | 4 ++++ schema/crdb/dbinit.sql | 2 +- sled-agent/src/sim/disk.rs | 2 +- sled-agent/src/sled_agent.rs | 2 +- tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +++--- 20 files changed, 55 insertions(+), 39 deletions(-) create mode 100644 schema/crdb/15.0.0/up01.sql create mode 100644 schema/crdb/15.0.0/up02.sql diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 6667f759e4..3ecba7e710 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -225,7 +225,7 @@ impl From<&omicron_common::api::internal::nexus::ProducerEndpoint> address: s.address.to_string(), base_route: s.base_route.clone(), id: s.id, - kind: s.kind.map(Into::into), + kind: s.kind.into(), interval: s.interval.into(), } } diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs index 8a03304e06..11aa1452f8 100644 --- a/clients/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -43,7 +43,7 @@ impl From<&omicron_common::api::internal::nexus::ProducerEndpoint> address: s.address.to_string(), base_route: s.base_route.clone(), id: s.id, - kind: s.kind.map(Into::into), + kind: s.kind.into(), interval: s.interval.into(), } } diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 1daa85dbe7..780e60b1a2 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -103,7 +103,7 @@ pub struct ProducerEndpoint { /// A unique ID for this producer. pub id: Uuid, /// The kind of producer. - pub kind: Option, + pub kind: ProducerKind, /// The IP address and port at which `oximeter` can collect metrics from the /// producer. pub address: SocketAddr, diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 52a69e0508..f282f6f08f 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -52,7 +52,7 @@ pub struct ProducerEndpoint { #[diesel(embed)] identity: ProducerEndpointIdentity, - pub kind: Option, + pub kind: ProducerKind, pub ip: ipnetwork::IpNetwork, pub port: SqlU16, pub interval: f64, @@ -69,7 +69,7 @@ impl ProducerEndpoint { ) -> Self { Self { identity: ProducerEndpointIdentity::new(endpoint.id), - kind: endpoint.kind.map(Into::into), + kind: endpoint.kind.into(), ip: endpoint.address.ip().into(), port: endpoint.address.port().into(), base_route: endpoint.base_route.clone(), diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 6527da3637..5b97bd10a9 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -399,7 +399,7 @@ table! { id -> Uuid, time_created -> Timestamptz, time_modified -> Timestamptz, - kind -> Nullable, + kind -> crate::ProducerKindEnum, ip -> Inet, port -> Int4, interval -> Float8, diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 66f39a32b6..a168b35293 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -127,9 +127,7 @@ impl super::Nexus { for producer in producers.into_iter() { let producer_info = oximeter_client::types::ProducerEndpoint { id: producer.id(), - kind: producer - .kind - .map(|kind| nexus::ProducerKind::from(kind).into()), + kind: nexus::ProducerKind::from(producer.kind).into(), address: SocketAddr::new( producer.ip.ip(), producer.port.try_into().unwrap(), @@ -152,7 +150,7 @@ impl super::Nexus { pub(crate) async fn register_as_producer(&self, address: SocketAddr) { let producer_endpoint = nexus::ProducerEndpoint { id: self.id, - kind: Some(nexus::ProducerKind::Service), + kind: nexus::ProducerKind::Service, address, base_route: String::from("/metrics/collect"), interval: Duration::from_secs(10), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 1e7de6132b..52ff8910f9 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1093,7 +1093,7 @@ pub async fn start_producer_server( let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_secs(1), diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index e97f36daf4..7dc453d713 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -361,7 +361,7 @@ async fn test_oximeter_collector_reregistration_gets_all_assignments() { ids.insert(id); let info = ProducerEndpoint { id, - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 12345), base_route: String::from("/collect"), interval: Duration::from_secs(1), diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index c358b4109b..e0580e7c13 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4343,7 +4343,6 @@ ] }, "kind": { - "nullable": true, "description": "The kind of producer.", "allOf": [ { @@ -4356,7 +4355,8 @@ "address", "base_route", "id", - "interval" + "interval", + "kind" ] }, "ProducerKind": { diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f7e534c95d..f5c78d53cd 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -212,7 +212,6 @@ ] }, "kind": { - "nullable": true, "description": "The kind of producer.", "allOf": [ { @@ -225,7 +224,8 @@ "address", "base_route", "id", - "interval" + "interval", + "kind" ] }, "ProducerEndpointResultsPage": { diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 365527ef08..4135125a48 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -712,7 +712,7 @@ mod tests { // Register the dummy producer. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval: COLLECTION_INTERVAL, @@ -768,7 +768,7 @@ mod tests { // unreachable. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, 0, @@ -854,7 +854,7 @@ mod tests { // Register the rather flaky producer. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval: COLLECTION_INTERVAL, diff --git a/oximeter/producer/examples/producer.rs b/oximeter/producer/examples/producer.rs index baa4f57bf7..8dbe0b6ad9 100644 --- a/oximeter/producer/examples/producer.rs +++ b/oximeter/producer/examples/producer.rs @@ -125,7 +125,7 @@ async fn main() -> anyhow::Result<()> { registry.register_producer(producer).unwrap(); let server_info = ProducerEndpoint { id: registry.producer_id(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: args.address, base_route: "/collect".to_string(), interval: Duration::from_secs(10), diff --git a/package-manifest.toml b/package-manifest.toml index 26c45f0ff7..3bce4aafee 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -384,10 +384,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" +source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "897d0fd6c0b82db42256a63a13c228152e1117434afa2681f649b291e3c6f46d" +source.sha256 = "f8c23cbf89fd0bbd928d8e3db1357bbea6e6b50560e221f873da5b56ed9d7527" output.type = "zone" [package.crucible-pantry] @@ -395,10 +395,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" +source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "fe545de7ac4f15454d7827927149c5f0fc68ce9545b4f1ef96aac9ac8039805a" +source.sha256 = "a25b31c81798eb65564dbe259858fdd9715784d212d3508791b1ef0cf6d17da6" output.type = "zone" # Refer to @@ -409,10 +409,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "54398875a2125227d13827d4236dce943c019b1c" +source.commit = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "01b8563db6626f90ee3fb6d97e7921b0a680373d843c1bea7ebf46fcea4f7b28" +source.sha256 = "cd341409eb2ffc3d8bec89fd20cad61d170f89d3adf926f6104eb01f4f4da881" output.type = "zone" [package.mg-ddm-gz] @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "428cce1e9aa399b1b49c04e7fd0bc1cb0e3f3fae6fda96055892a42e010c9d6f" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "c34b10d47fa3eb9f9f6b3655ea4ed8a726f93399ea177efea79f5c89f2ab5a1e" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "5dd3534bec5eb4f857d0bf3994b26650288f650d409eec6aaa29860a2f481c37" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "ce7065227c092ee82704f39a966b7441e3ae82d75eedb6eb281bd8b3e5873e32" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/15.0.0/up01.sql b/schema/crdb/15.0.0/up01.sql new file mode 100644 index 0000000000..f9806c5917 --- /dev/null +++ b/schema/crdb/15.0.0/up01.sql @@ -0,0 +1,14 @@ +/* + * Previous commits added the optional kind of a producer. In this version, + * we're making the value required and not nullable. We'll first delete all + * records with a NULL kind -- there should not be any, since all producers both + * in an out of tree have been updated. Nonetheless, this is safe because + * currently we're updating offline, and all producers should re-register when + * they are restarted. + * + * NOTE: Full table scans are disallowed, however we don't have an index on + * producer kind (and don't currently need one). Allow full table scans for the + * context of this one statement. + */ +SET LOCAL disallow_full_table_scans = off; +DELETE FROM omicron.public.metric_producer WHERE kind IS NULL; diff --git a/schema/crdb/15.0.0/up02.sql b/schema/crdb/15.0.0/up02.sql new file mode 100644 index 0000000000..9c1ad2ea47 --- /dev/null +++ b/schema/crdb/15.0.0/up02.sql @@ -0,0 +1,4 @@ +/* + * Next, we make the field itself required in the database. + */ +ALTER TABLE IF EXISTS omicron.public.metric_producer ALTER COLUMN kind SET NOT NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 178c7af913..053bc0bcfb 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1182,7 +1182,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.metric_producer ( id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, - kind omicron.public.producer_kind, + kind omicron.public.producer_kind NOT NULL, ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, interval FLOAT NOT NULL, diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index f131fd2bff..fc388f6ce2 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -169,7 +169,7 @@ impl SimDisk { let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, - kind: Some(ProducerKind::SledAgent), + kind: ProducerKind::SledAgent, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_millis(200), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f5b71106cd..9f8d31b3c5 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -507,7 +507,7 @@ impl SledAgent { // Nexus. This should not block progress here. let endpoint = ProducerEndpoint { id: request.body.id, - kind: Some(ProducerKind::SledAgent), + kind: ProducerKind::SledAgent, address: sled_address.into(), base_route: String::from("/metrics/collect"), interval: crate::metrics::METRIC_COLLECTION_INTERVAL, diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index ba4b5a5722..c2dda4dbd0 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="8ff834e7d0a6adb263240edd40537f2c0768f1a4" +COMMIT="2af6adea85c62ac37e451148b84e5eb0ef005f36" SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 619a6bf287..77ee198fc5 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" -CIDL_SHA256_LINUX_DPD="b5d829b4628759ac374106f3c56c29074b29577fd0ff72f61c3b8289fea430fe" -CIDL_SHA256_LINUX_SWADM="afc68828f54dc57b32dc1556fc588baeab12341c30e96cc0fadb49f401b4b48f" +CIDL_SHA256_ILLUMOS="dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" +CIDL_SHA256_LINUX_DPD="b13b391a085ba6bf16fdd99774f64c9d53cd7220ad518d5839c8558fb925c40c" +CIDL_SHA256_LINUX_SWADM="6bfa4e367eb2b0be89f1588ac458026a186314597a4feb9fee6cea60101c7ebe" From 3555b5d2f4827269a61608ad012f258b74d676fb Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 29 Nov 2023 14:35:10 -0800 Subject: [PATCH 032/186] Various network fixes (#4564) --- common/src/api/internal/shared.rs | 7 +- nexus/db-model/src/schema.rs | 3 +- nexus/db-model/src/switch_port.rs | 3 + .../src/db/datastore/switch_port.rs | 97 ++++++----- nexus/src/app/rack.rs | 162 +++++++++++++++++- .../app/sagas/switch_port_settings_common.rs | 3 +- nexus/tests/integration_tests/switch_port.rs | 23 +-- nexus/types/src/external_api/params.rs | 56 +++++- openapi/bootstrap-agent.json | 5 + openapi/nexus-internal.json | 5 + openapi/nexus.json | 21 ++- openapi/sled-agent.json | 5 + openapi/wicketd.json | 5 + schema/crdb/16.0.0/up1.sql | 1 + schema/crdb/dbinit.sql | 4 +- schema/rss-sled-plan.json | 5 + sled-agent/src/bootstrap/early_networking.rs | 14 +- sled-agent/src/rack_setup/service.rs | 11 +- .../gimlet-standalone/config-rss.toml | 2 + smf/sled-agent/non-gimlet/config-rss.toml | 4 +- tools/generate-wicketd-api.sh | 3 + .../src/cli/rack_setup/config_template.toml | 3 + wicket/src/cli/rack_setup/config_toml.rs | 8 + wicketd/src/preflight_check/uplink.rs | 6 +- wicketd/src/rss_config.rs | 1 + 25 files changed, 367 insertions(+), 90 deletions(-) create mode 100644 schema/crdb/16.0.0/up1.sql create mode 100755 tools/generate-wicketd-api.sh diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 155fbf971b..15ab4c66ce 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -140,6 +140,8 @@ pub struct PortConfigV1 { pub uplink_port_fec: PortFec, /// BGP peers on this port pub bgp_peers: Vec, + /// Whether or not to set autonegotiation + pub autoneg: bool, } impl From for PortConfigV1 { @@ -155,6 +157,7 @@ impl From for PortConfigV1 { uplink_port_speed: value.uplink_port_speed, uplink_port_fec: value.uplink_port_fec, bgp_peers: vec![], + autoneg: false, } } } @@ -260,7 +263,7 @@ pub enum ExternalPortDiscovery { } /// Switchport Speed options -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum PortSpeed { #[serde(alias = "0G")] @@ -284,7 +287,7 @@ pub enum PortSpeed { } /// Switchport FEC options -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum PortFec { Firecode, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 5b97bd10a9..7d4ae241aa 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -146,6 +146,7 @@ table! { mtu -> Int4, fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, + autoneg -> Bool, } } @@ -1300,7 +1301,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(16, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index 44588899b6..6ff8612d2f 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -355,6 +355,7 @@ pub struct SwitchPortLinkConfig { pub mtu: SqlU16, pub fec: SwitchLinkFec, pub speed: SwitchLinkSpeed, + pub autoneg: bool, } impl SwitchPortLinkConfig { @@ -365,6 +366,7 @@ impl SwitchPortLinkConfig { mtu: u16, fec: SwitchLinkFec, speed: SwitchLinkSpeed, + autoneg: bool, ) -> Self { Self { port_settings_id, @@ -372,6 +374,7 @@ impl SwitchPortLinkConfig { link_name, fec, speed, + autoneg, mtu: mtu.into(), } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index d7319347f0..6bd4e61f70 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -234,6 +234,7 @@ impl DataStore { c.mtu, c.fec.into(), c.speed.into(), + c.autoneg, )); } result.link_lldp = @@ -304,39 +305,41 @@ impl DataStore { .await?; let mut bgp_peer_config = Vec::new(); - for (interface_name, p) in ¶ms.bgp_peers { - use db::schema::bgp_config; - let bgp_config_id = match &p.bgp_config { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name = name.to_string(); - bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| - TxnError::CustomError( - SwitchPortSettingsCreateError::BgpConfigNotFound, - ) - )? - } - }; + for (interface_name, peer_config) in ¶ms.bgp_peers { + for p in &peer_config.peers { + use db::schema::bgp_config; + let bgp_config_id = match &p.bgp_config { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name = name.to_string(); + bgp_config_dsl::bgp_config + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|_| + TxnError::CustomError( + SwitchPortSettingsCreateError::BgpConfigNotFound, + ) + )? + } + }; - bgp_peer_config.push(SwitchPortBgpPeerConfig::new( - psid, - bgp_config_id, - interface_name.clone(), - p.addr.into(), - p.hold_time.into(), - p.idle_hold_time.into(), - p.delay_open.into(), - p.connect_retry.into(), - p.keepalive.into(), - )); + bgp_peer_config.push(SwitchPortBgpPeerConfig::new( + psid, + bgp_config_id, + interface_name.clone(), + p.addr.into(), + p.hold_time.into(), + p.idle_hold_time.into(), + p.delay_open.into(), + p.connect_retry.into(), + p.keepalive.into(), + )); + } } result.bgp_peers = diesel::insert_into( @@ -1152,8 +1155,8 @@ mod test { use crate::db::datastore::{datastore_test, UpdatePrecondition}; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params::{ - BgpAnnounceSetCreate, BgpConfigCreate, BgpPeerConfig, SwitchPortConfig, - SwitchPortGeometry, SwitchPortSettingsCreate, + BgpAnnounceSetCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, + SwitchPortConfig, SwitchPortGeometry, SwitchPortSettingsCreate, }; use omicron_common::api::external::{ IdentityMetadataCreateParams, Name, NameOrId, @@ -1217,19 +1220,21 @@ mod test { bgp_peers: HashMap::from([( "phy0".into(), BgpPeerConfig { - bgp_announce_set: NameOrId::Name( - "test-announce-set".parse().unwrap(), - ), - bgp_config: NameOrId::Name( - "test-bgp-config".parse().unwrap(), - ), - interface_name: "qsfp0".into(), - addr: "192.168.1.1".parse().unwrap(), - hold_time: 0, - idle_hold_time: 0, - delay_open: 0, - connect_retry: 0, - keepalive: 0, + peers: vec![BgpPeer { + bgp_announce_set: NameOrId::Name( + "test-announce-set".parse().unwrap(), + ), + bgp_config: NameOrId::Name( + "test-bgp-config".parse().unwrap(), + ), + interface_name: "qsfp0".into(), + addr: "192.168.1.1".parse().unwrap(), + hold_time: 0, + idle_hold_time: 0, + delay_open: 0, + connect_retry: 0, + keepalive: 0, + }], }, )]), addresses: HashMap::new(), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 984ece2d0c..95283faa1c 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -23,10 +23,16 @@ use nexus_db_queries::db::lookup::LookupPath; use nexus_types::external_api::params::Address; use nexus_types::external_api::params::AddressConfig; use nexus_types::external_api::params::AddressLotBlockCreate; +use nexus_types::external_api::params::BgpAnnounceSetCreate; +use nexus_types::external_api::params::BgpAnnouncementCreate; +use nexus_types::external_api::params::BgpConfigCreate; +use nexus_types::external_api::params::BgpPeer; +use nexus_types::external_api::params::LinkConfig; +use nexus_types::external_api::params::LldpServiceConfig; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfig; use nexus_types::external_api::params::{ - AddressLotCreate, LoopbackAddressCreate, Route, SiloCreate, + AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, SwitchPortSettingsCreate, }; use nexus_types::external_api::shared::Baseboard; @@ -51,8 +57,8 @@ use sled_agent_client::types::EarlyNetworkConfigBody; use sled_agent_client::types::StartSledAgentRequest; use sled_agent_client::types::StartSledAgentRequestBody; use sled_agent_client::types::{ - BgpConfig, BgpPeerConfig, EarlyNetworkConfig, PortConfigV1, - RackNetworkConfigV1, RouteConfig as SledRouteConfig, + BgpConfig, BgpPeerConfig as SledBgpPeerConfig, EarlyNetworkConfig, + PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, }; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -406,6 +412,108 @@ impl super::Nexus { Error::internal_error(&format!("unable to retrieve authz_address_lot for infra address_lot: {e}")) })?; + let mut bgp_configs = HashMap::new(); + + for bgp_config in &rack_network_config.bgp { + bgp_configs.insert(bgp_config.asn, bgp_config.clone()); + + let bgp_config_name: Name = + format!("as{}", bgp_config.asn).parse().unwrap(); + + let announce_set_name: Name = + format!("as{}-announce", bgp_config.asn).parse().unwrap(); + + let address_lot_name: Name = + format!("as{}-lot", bgp_config.asn).parse().unwrap(); + + self.db_datastore + .address_lot_create( + &opctx, + &AddressLotCreate { + identity: IdentityMetadataCreateParams { + name: address_lot_name, + description: format!( + "Address lot for announce set in as {}", + bgp_config.asn + ), + }, + kind: AddressLotKind::Infra, + blocks: bgp_config + .originate + .iter() + .map(|o| AddressLotBlockCreate { + first_address: o.network().into(), + last_address: o.broadcast().into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create address lot for BGP as {}: {}", + bgp_config.asn, e + )) + })?; + + self.db_datastore + .bgp_create_announce_set( + &opctx, + &BgpAnnounceSetCreate { + identity: IdentityMetadataCreateParams { + name: announce_set_name.clone(), + description: format!( + "Announce set for AS {}", + bgp_config.asn + ), + }, + announcement: bgp_config + .originate + .iter() + .map(|x| BgpAnnouncementCreate { + address_lot_block: NameOrId::Name( + format!("as{}", bgp_config.asn) + .parse() + .unwrap(), + ), + network: IpNetwork::from(*x).into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create bgp announce set for as {}: {}", + bgp_config.asn, e + )) + })?; + + self.db_datastore + .bgp_config_set( + &opctx, + &BgpConfigCreate { + identity: IdentityMetadataCreateParams { + name: bgp_config_name, + description: format!( + "BGP config for AS {}", + bgp_config.asn + ), + }, + asn: bgp_config.asn, + bgp_announce_set_id: announce_set_name.into(), + vrf: None, + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to set bgp config for as {}: {}", + bgp_config.asn, e + )) + })?; + } + for (idx, uplink_config) in rack_network_config.ports.iter().enumerate() { @@ -503,6 +611,43 @@ impl super::Nexus { .routes .insert("phy0".to_string(), RouteConfig { routes }); + let peers: Vec = uplink_config + .bgp_peers + .iter() + .map(|r| BgpPeer { + bgp_announce_set: NameOrId::Name( + format!("as{}-announce", r.asn).parse().unwrap(), + ), + bgp_config: NameOrId::Name( + format!("as{}", r.asn).parse().unwrap(), + ), + interface_name: "phy0".into(), + addr: r.addr.into(), + hold_time: r.hold_time.unwrap_or(6) as u32, + idle_hold_time: r.idle_hold_time.unwrap_or(3) as u32, + delay_open: r.delay_open.unwrap_or(0) as u32, + connect_retry: r.connect_retry.unwrap_or(3) as u32, + keepalive: r.keepalive.unwrap_or(2) as u32, + }) + .collect(); + + port_settings_params + .bgp_peers + .insert("phy0".to_string(), BgpPeerConfig { peers }); + + let link = LinkConfig { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 + lldp: LldpServiceConfig { + enabled: false, + lldp_config: None, + }, + fec: uplink_config.uplink_port_fec.into(), + speed: uplink_config.uplink_port_speed.into(), + autoneg: uplink_config.autoneg, + }; + + port_settings_params.links.insert("phy".to_string(), link); + match self .db_datastore .switch_port_settings_create( @@ -658,7 +803,7 @@ impl super::Nexus { addresses: info.addresses.iter().map(|a| a.address).collect(), bgp_peers: peer_info .iter() - .map(|(p, asn, addr)| BgpPeerConfig { + .map(|(p, asn, addr)| SledBgpPeerConfig { addr: *addr, asn: *asn, port: port.port_name.clone(), @@ -673,16 +818,21 @@ impl super::Nexus { port: port.port_name.clone(), uplink_port_fec: info .links - .get(0) //TODO breakout support + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 .map(|l| l.fec) .unwrap_or(SwitchLinkFec::None) .into(), uplink_port_speed: info .links - .get(0) //TODO breakout support + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), + autoneg: info + .links + .get(0) //TODO breakout support + .map(|l| l.autoneg) + .unwrap_or(false), }; ports.push(p); diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index b328c6d1ac..9132645782 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -55,7 +55,7 @@ pub(crate) fn api_to_dpd_port_settings( link_id.to_string(), LinkSettings { params: LinkCreate { - autoneg: false, + autoneg: l.autoneg, lane: Some(LinkId(0)), kr: false, fec: match l.fec { @@ -251,6 +251,7 @@ pub(crate) async fn bootstore_update( .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), + autoneg: settings.links.get(0).map(|l| l.autoneg).unwrap_or(false), bgp_peers: peer_info .iter() .filter_map(|(p, asn)| { diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index d163fc6b06..df4d96c6d1 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -10,7 +10,7 @@ use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, - BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, + BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, LinkConfig, LinkFec, LinkSpeed, LldpServiceConfig, Route, RouteConfig, SwitchInterfaceConfig, SwitchInterfaceKind, SwitchPortApplySettings, SwitchPortSettingsCreate, @@ -118,6 +118,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { lldp: LldpServiceConfig { enabled: false, lldp_config: None }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, + autoneg: false, }, ); // interfaces @@ -252,15 +253,17 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { settings.bgp_peers.insert( "phy0".into(), BgpPeerConfig { - bgp_config: NameOrId::Name("as47".parse().unwrap()), //TODO - bgp_announce_set: NameOrId::Name("instances".parse().unwrap()), //TODO - interface_name: "phy0".to_string(), - addr: "1.2.3.4".parse().unwrap(), - hold_time: 6, - idle_hold_time: 6, - delay_open: 0, - connect_retry: 3, - keepalive: 2, + peers: vec![BgpPeer { + bgp_config: NameOrId::Name("as47".parse().unwrap()), + bgp_announce_set: NameOrId::Name("instances".parse().unwrap()), + interface_name: "phy0".to_string(), + addr: "1.2.3.4".parse().unwrap(), + hold_time: 6, + idle_hold_time: 6, + delay_open: 0, + connect_retry: 3, + keepalive: 2, + }], }, ); let _created: SwitchPortSettingsView = NexusRequest::objects_post( diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a5f1f3f874..3303d38367 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1354,6 +1354,18 @@ pub enum LinkFec { Rs, } +impl From for LinkFec { + fn from(x: omicron_common::api::internal::shared::PortFec) -> LinkFec { + match x { + omicron_common::api::internal::shared::PortFec::Firecode => { + Self::Firecode + } + omicron_common::api::internal::shared::PortFec::None => Self::None, + omicron_common::api::internal::shared::PortFec::Rs => Self::Rs, + } + } +} + /// The speed of a link. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] @@ -1378,6 +1390,40 @@ pub enum LinkSpeed { Speed400G, } +impl From for LinkSpeed { + fn from(x: omicron_common::api::internal::shared::PortSpeed) -> Self { + match x { + omicron_common::api::internal::shared::PortSpeed::Speed0G => { + Self::Speed0G + } + omicron_common::api::internal::shared::PortSpeed::Speed1G => { + Self::Speed1G + } + omicron_common::api::internal::shared::PortSpeed::Speed10G => { + Self::Speed10G + } + omicron_common::api::internal::shared::PortSpeed::Speed25G => { + Self::Speed25G + } + omicron_common::api::internal::shared::PortSpeed::Speed40G => { + Self::Speed40G + } + omicron_common::api::internal::shared::PortSpeed::Speed50G => { + Self::Speed50G + } + omicron_common::api::internal::shared::PortSpeed::Speed100G => { + Self::Speed100G + } + omicron_common::api::internal::shared::PortSpeed::Speed200G => { + Self::Speed200G + } + omicron_common::api::internal::shared::PortSpeed::Speed400G => { + Self::Speed400G + } + } + } +} + /// Switch link configuration. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct LinkConfig { @@ -1392,6 +1438,9 @@ pub struct LinkConfig { /// The speed of the link. pub speed: LinkSpeed, + + /// Whether or not to set autonegotiation + pub autoneg: bool, } /// The LLDP configuration associated with a port. LLDP may be either enabled or @@ -1479,12 +1528,17 @@ pub struct BgpConfigListSelector { pub name_or_id: Option, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BgpPeerConfig { + pub peers: Vec, +} + /// A BGP peer configuration for an interface. Includes the set of announcements /// that will be advertised to the peer identified by `addr`. The `bgp_config` /// parameter is a reference to global BGP parameters. The `interface_name` /// indicates what interface the peer should be contacted on. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct BgpPeerConfig { +pub struct BgpPeer { /// The set of announcements advertised by the peer. pub bgp_announce_set: NameOrId, diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 2c7ffbc337..efd9c05fa9 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -510,6 +510,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -555,6 +559,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index e0580e7c13..82c799b78d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4240,6 +4240,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -4285,6 +4289,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/nexus.json b/openapi/nexus.json index 08e6cd7149..15e75f93ff 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -7865,7 +7865,7 @@ "switch" ] }, - "BgpPeerConfig": { + "BgpPeer": { "description": "A BGP peer configuration for an interface. Includes the set of announcements that will be advertised to the peer identified by `addr`. The `bgp_config` parameter is a reference to global BGP parameters. The `interface_name` indicates what interface the peer should be contacted on.", "type": "object", "properties": { @@ -7937,6 +7937,20 @@ "keepalive" ] }, + "BgpPeerConfig": { + "type": "object", + "properties": { + "peers": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpPeer" + } + } + }, + "required": [ + "peers" + ] + }, "BgpPeerState": { "description": "The current state of a BGP peer.", "oneOf": [ @@ -11938,6 +11952,10 @@ "description": "Switch link configuration.", "type": "object", "properties": { + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "fec": { "description": "The forward error correction mode of the link.", "allOf": [ @@ -11970,6 +11988,7 @@ } }, "required": [ + "autoneg", "fec", "lldp", "mtu", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ed202ddbdb..22216b9571 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5037,6 +5037,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -5082,6 +5086,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 60ad9a42df..32e3b70de2 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1545,6 +1545,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -1590,6 +1594,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/schema/crdb/16.0.0/up1.sql b/schema/crdb/16.0.0/up1.sql new file mode 100644 index 0000000000..d28d5ca4b5 --- /dev/null +++ b/schema/crdb/16.0.0/up1.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 053bc0bcfb..8a34c09bc1 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3000,6 +3000,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( CHECK (singleton = true) ); +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3007,7 +3009,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '15.0.0', NULL) + ( TRUE, NOW(), NOW(), '16.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 2ce8ae3bdc..5086c38a9c 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -366,6 +366,7 @@ "type": "object", "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", @@ -381,6 +382,10 @@ "$ref": "#/definitions/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index bec309dc27..cb411a2546 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -548,23 +548,20 @@ impl<'a> EarlyNetworkSetup<'a> { let mut addrs = Vec::new(); for a in &port_config.addresses { + // TODO We're discarding the `uplink_cidr.prefix()` here and only using + // the IP address; at some point we probably need to give the full CIDR + // to dendrite? addrs.push(a.ip()); } - // TODO We're discarding the `uplink_cidr.prefix()` here and only using - // the IP address; at some point we probably need to give the full CIDR - // to dendrite? let link_settings = LinkSettings { - // TODO Allow user to configure link properties - // https://github.com/oxidecomputer/omicron/issues/3061 params: LinkCreate { - autoneg: false, - kr: false, + autoneg: port_config.autoneg, + kr: false, //NOTE: kr does not apply to user configurable links. fec: convert_fec(&port_config.uplink_port_fec), speed: convert_speed(&port_config.uplink_port_speed), lane: Some(LinkId(0)), }, - //addrs: vec![addr], addrs, }; dpd_port_settings.links.insert(link_id.to_string(), link_settings); @@ -866,6 +863,7 @@ mod tests { port: uplink.uplink_port, uplink_port_speed: uplink.uplink_port_speed, uplink_port_fec: uplink.uplink_port_fec, + autoneg: false, bgp_peers: vec![], }], bgp: vec![], diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 7dcbfa7045..0b1eadf464 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -598,14 +598,9 @@ impl ServiceInner { .collect(), addresses: config.addresses.clone(), switch: config.switch.into(), - uplink_port_speed: config - .uplink_port_speed - .clone() - .into(), - uplink_port_fec: config - .uplink_port_fec - .clone() - .into(), + uplink_port_speed: config.uplink_port_speed.into(), + uplink_port_fec: config.uplink_port_fec.into(), + autoneg: config.autoneg, bgp_peers: config .bgp_peers .iter() diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 29a7a79eba..f7a93260e3 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -110,6 +110,8 @@ port = "qsfp0" uplink_port_speed = "40G" # The forward error correction mode for this port. uplink_port_fec="none" +# Do not use autonegotiation +autoneg = false # Switch to use for the uplink. For single-rack deployments this can be # "switch0" (upper slot) or "switch1" (lower slot). For single-node softnpu # and dendrite stub environments, use "switch0" diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index fea3cfa5d8..fdc81c0f8f 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -109,7 +109,9 @@ port = "qsfp0" # The speed of this port. uplink_port_speed = "40G" # The forward error correction mode for this port. -uplink_port_fec="none" +uplink_port_fec = "none" +# Do not use autonegotiation +autoneg = false # Switch to use for the uplink. For single-rack deployments this can be # "switch0" (upper slot) or "switch1" (lower slot). For single-node softnpu # and dendrite stub environments, use "switch0" diff --git a/tools/generate-wicketd-api.sh b/tools/generate-wicketd-api.sh new file mode 100755 index 0000000000..f1af33aecc --- /dev/null +++ b/tools/generate-wicketd-api.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./target/debug/wicketd openapi > openapi/wicketd.json diff --git a/wicket/src/cli/rack_setup/config_template.toml b/wicket/src/cli/rack_setup/config_template.toml index 617b61fadc..2886fa01d7 100644 --- a/wicket/src/cli/rack_setup/config_template.toml +++ b/wicket/src/cli/rack_setup/config_template.toml @@ -65,6 +65,9 @@ uplink_port_speed = "" # `none`, `firecode`, or `rs` uplink_port_fec = "" +# `true` or `false` +autoneg = "" + # A list of bgp peers # { addr = "1.7.0.1", asn = 47, port = "qsfp0" } bgp_peers = [] diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 9b1a25a50e..5a8e8a560e 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -229,6 +229,12 @@ fn populate_network_table( ); _last_key = Some(property); } + uplink.insert( + "autoneg", + Item::Value(Value::Boolean(Formatted::new( + cfg.autoneg, + ))), + ); let mut routes = Array::new(); for r in &cfg.routes { @@ -449,6 +455,7 @@ mod tests { PortFec::None => InternalPortFec::None, PortFec::Rs => InternalPortFec::Rs, }, + autoneg: config.autoneg, switch: match config.switch { SwitchLocation::Switch0 => { InternalSwitchLocation::Switch0 @@ -529,6 +536,7 @@ mod tests { }], uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + autoneg: true, port: "port0".into(), switch: SwitchLocation::Switch0, }], diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index d94baf1995..25411f17a5 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -775,10 +775,8 @@ fn build_port_settings( LinkSettings { addrs, params: LinkCreate { - // TODO we should take these parameters too - // https://github.com/oxidecomputer/omicron/issues/3061 - autoneg: false, - kr: false, + autoneg: uplink.autoneg, + kr: false, //NOTE: kr does not apply to user configurable links fec, speed, lane: Some(LinkId(0)), diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index 0aaea427f3..f654597d81 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -548,6 +548,7 @@ fn validate_rack_network_config( PortFec::None => BaPortFec::None, PortFec::Rs => BaPortFec::Rs, }, + autoneg: config.autoneg, }) .collect(), bgp: config From a04f5e387d74ef445db9c217f54cead1f5668cf1 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 29 Nov 2023 18:23:01 -0800 Subject: [PATCH 033/186] fix schema collision (#4580) --- nexus/db-model/src/schema.rs | 2 +- schema/crdb/{15.0.0 => 16.0.0}/up01.sql | 0 schema/crdb/{15.0.0 => 16.0.0}/up02.sql | 0 schema/crdb/{16.0.0 => 17.0.0}/up1.sql | 0 schema/crdb/dbinit.sql | 2 +- 5 files changed, 2 insertions(+), 2 deletions(-) rename schema/crdb/{15.0.0 => 16.0.0}/up01.sql (100%) rename schema/crdb/{15.0.0 => 16.0.0}/up02.sql (100%) rename schema/crdb/{16.0.0 => 17.0.0}/up1.sql (100%) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7d4ae241aa..be345032ac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1301,7 +1301,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(16, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(17, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/schema/crdb/15.0.0/up01.sql b/schema/crdb/16.0.0/up01.sql similarity index 100% rename from schema/crdb/15.0.0/up01.sql rename to schema/crdb/16.0.0/up01.sql diff --git a/schema/crdb/15.0.0/up02.sql b/schema/crdb/16.0.0/up02.sql similarity index 100% rename from schema/crdb/15.0.0/up02.sql rename to schema/crdb/16.0.0/up02.sql diff --git a/schema/crdb/16.0.0/up1.sql b/schema/crdb/17.0.0/up1.sql similarity index 100% rename from schema/crdb/16.0.0/up1.sql rename to schema/crdb/17.0.0/up1.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 8a34c09bc1..f4caa2a4e6 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3009,7 +3009,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '16.0.0', NULL) + ( TRUE, NOW(), NOW(), '17.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 25fd21b0de9b0b7eb773019bd446eea33a33d691 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 05:21:07 +0000 Subject: [PATCH 034/186] Update taiki-e/install-action digest to 6b385b7 (#4583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`f7c663c` -> `6b385b7`](https://togithub.com/taiki-e/install-action/compare/f7c663c...6b385b7) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 1805da8ad8..afc56f40ca 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@f7c663c03b51ed0d93e9cec22a575d3f02175989 # v2 + uses: taiki-e/install-action@6b385b7509c65e9d1b7d6b72244f7e275a7f5cef # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 9023e15bf7c407ce8e1e350de2a2e8cde4a1ac3e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 30 Nov 2023 08:53:36 -0800 Subject: [PATCH 035/186] [nexus] Add `HostPhase1Updater` (#4548) Continues the work from #4427 and #4502 adding types to Nexus that can deliver updates to components managed by the SP, and contains a similar amount of duplication from those two. Non-duplication changes are mostly refactoring: * Removed component-specific error types in favor of `SpComponentUpdateError` * Extracted the "start the update and poll MGS until it's done" logic into a new `common_sp_update` module The `test_host_phase1_updater_delivers_progress` is subtly different from the RoT/SP versions of the same test, but not in a way that's particularly interesting (see the "Unlike the SP and RoT cases" comment for details). --- nexus/src/app/test_interfaces.rs | 3 +- nexus/src/app/update/common_sp_update.rs | 239 +++++++ nexus/src/app/update/host_phase1_updater.rs | 177 ++++++ nexus/src/app/update/mgs_clients.rs | 146 ----- nexus/src/app/update/mod.rs | 10 +- nexus/src/app/update/rot_updater.rs | 175 ++---- nexus/src/app/update/sp_updater.rs | 166 ++--- .../integration_tests/host_phase1_updater.rs | 584 ++++++++++++++++++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rot_updater.rs | 46 +- nexus/tests/integration_tests/sp_updater.rs | 46 +- sp-sim/src/gimlet.rs | 18 +- sp-sim/src/lib.rs | 6 + sp-sim/src/sidecar.rs | 8 + sp-sim/src/update.rs | 44 +- 15 files changed, 1226 insertions(+), 443 deletions(-) create mode 100644 nexus/src/app/update/common_sp_update.rs create mode 100644 nexus/src/app/update/host_phase1_updater.rs create mode 100644 nexus/tests/integration_tests/host_phase1_updater.rs diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index 6161a9a1c1..581b9a89bb 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -10,10 +10,9 @@ use sled_agent_client::Client as SledAgentClient; use std::sync::Arc; use uuid::Uuid; +pub use super::update::HostPhase1Updater; pub use super::update::MgsClients; -pub use super::update::RotUpdateError; pub use super::update::RotUpdater; -pub use super::update::SpUpdateError; pub use super::update::SpUpdater; pub use super::update::UpdateProgress; pub use gateway_client::types::SpType; diff --git a/nexus/src/app/update/common_sp_update.rs b/nexus/src/app/update/common_sp_update.rs new file mode 100644 index 0000000000..69a5b132a2 --- /dev/null +++ b/nexus/src/app/update/common_sp_update.rs @@ -0,0 +1,239 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module containing implementation details shared amongst all MGS-to-SP-driven +//! updates. + +use super::MgsClients; +use super::UpdateProgress; +use gateway_client::types::SpType; +use gateway_client::types::SpUpdateStatus; +use slog::Logger; +use std::time::Duration; +use tokio::sync::watch; +use uuid::Uuid; + +type GatewayClientError = gateway_client::Error; + +/// Error type returned when an update to a component managed by the SP fails. +/// +/// Note that the SP manages itself, as well, so "SP component" here includes +/// the SP. +#[derive(Debug, thiserror::Error)] +pub enum SpComponentUpdateError { + #[error("error communicating with MGS")] + MgsCommunication(#[from] GatewayClientError), + #[error("different update is now preparing ({0})")] + DifferentUpdatePreparing(Uuid), + #[error("different update is now in progress ({0})")] + DifferentUpdateInProgress(Uuid), + #[error("different update is now complete ({0})")] + DifferentUpdateComplete(Uuid), + #[error("different update is now aborted ({0})")] + DifferentUpdateAborted(Uuid), + #[error("different update failed ({0})")] + DifferentUpdateFailed(Uuid), + #[error("update status lost (did the SP reset?)")] + UpdateStatusLost, + #[error("update was aborted")] + UpdateAborted, + #[error("update failed (error code {0})")] + UpdateFailedWithCode(u32), + #[error("update failed (error message {0})")] + UpdateFailedWithMessage(String), +} + +pub(super) trait SpComponentUpdater { + /// The target component. + /// + /// Should be produced via `SpComponent::const_as_str()`. + fn component(&self) -> &'static str; + + /// The type of the target SP. + fn target_sp_type(&self) -> SpType; + + /// The slot number of the target SP. + fn target_sp_slot(&self) -> u32; + + /// The target firmware slot for the component. + fn firmware_slot(&self) -> u16; + + /// The ID of this update. + fn update_id(&self) -> Uuid; + + /// The update payload data to send to MGS. + // TODO-performance This has to be convertible into a `reqwest::Body`, so we + // return an owned Vec. That requires all our implementors to clone the data + // at least once; maybe we should use `Bytes` instead (which is cheap to + // clone and also convertible into a reqwest::Body)? + fn update_data(&self) -> Vec; + + /// The sending half of the watch channel to report update progress. + fn progress(&self) -> &watch::Sender>; + + /// Logger to use while performing this update. + fn logger(&self) -> &Logger; +} + +pub(super) async fn deliver_update( + updater: &(dyn SpComponentUpdater + Send + Sync), + mgs_clients: &mut MgsClients, +) -> Result<(), SpComponentUpdateError> { + // How frequently do we poll MGS for the update progress? + const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); + + // Start the update. + mgs_clients + .try_all_serially(updater.logger(), |client| async move { + client + .sp_component_update( + updater.target_sp_type(), + updater.target_sp_slot(), + updater.component(), + updater.firmware_slot(), + &updater.update_id(), + reqwest::Body::from(updater.update_data()), + ) + .await?; + updater.progress().send_replace(Some(UpdateProgress::Started)); + info!( + updater.logger(), "update started"; + "mgs_addr" => client.baseurl(), + ); + Ok(()) + }) + .await?; + + // Wait for the update to complete. + loop { + let status = mgs_clients + .try_all_serially(updater.logger(), |client| async move { + let update_status = client + .sp_component_update_status( + updater.target_sp_type(), + updater.target_sp_slot(), + updater.component(), + ) + .await?; + + debug!( + updater.logger(), "got update status"; + "mgs_addr" => client.baseurl(), + "status" => ?update_status, + ); + + Ok(update_status) + }) + .await?; + + if status_is_complete( + status.into_inner(), + updater.update_id(), + updater.progress(), + updater.logger(), + )? { + updater.progress().send_replace(Some(UpdateProgress::InProgress { + progress: Some(1.0), + })); + return Ok(()); + } + + tokio::time::sleep(STATUS_POLL_INTERVAL).await; + } +} + +fn status_is_complete( + status: SpUpdateStatus, + update_id: Uuid, + progress_tx: &watch::Sender>, + log: &Logger, +) -> Result { + match status { + // For `Preparing` and `InProgress`, we could check the progress + // information returned by these steps and try to check that + // we're still _making_ progress, but every Nexus instance needs + // to do that anyway in case we (or the MGS instance delivering + // the update) crash, so we'll omit that check here. Instead, we + // just sleep and we'll poll again shortly. + SpUpdateStatus::Preparing { id, progress } => { + if id == update_id { + let progress = progress.and_then(|progress| { + if progress.current > progress.total { + warn!( + log, "nonsense preparing progress"; + "current" => progress.current, + "total" => progress.total, + ); + None + } else if progress.total == 0 { + None + } else { + Some( + f64::from(progress.current) + / f64::from(progress.total), + ) + } + }); + progress_tx + .send_replace(Some(UpdateProgress::Preparing { progress })); + Ok(false) + } else { + Err(SpComponentUpdateError::DifferentUpdatePreparing(id)) + } + } + SpUpdateStatus::InProgress { id, bytes_received, total_bytes } => { + if id == update_id { + let progress = if bytes_received > total_bytes { + warn!( + log, "nonsense update progress"; + "bytes_received" => bytes_received, + "total_bytes" => total_bytes, + ); + None + } else if total_bytes == 0 { + None + } else { + Some(f64::from(bytes_received) / f64::from(total_bytes)) + }; + progress_tx.send_replace(Some(UpdateProgress::InProgress { + progress, + })); + Ok(false) + } else { + Err(SpComponentUpdateError::DifferentUpdateInProgress(id)) + } + } + SpUpdateStatus::Complete { id } => { + if id == update_id { + Ok(true) + } else { + Err(SpComponentUpdateError::DifferentUpdateComplete(id)) + } + } + SpUpdateStatus::None => Err(SpComponentUpdateError::UpdateStatusLost), + SpUpdateStatus::Aborted { id } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateAborted) + } else { + Err(SpComponentUpdateError::DifferentUpdateAborted(id)) + } + } + SpUpdateStatus::Failed { code, id } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateFailedWithCode(code)) + } else { + Err(SpComponentUpdateError::DifferentUpdateFailed(id)) + } + } + SpUpdateStatus::RotError { id, message } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateFailedWithMessage(format!( + "rot error: {message}" + ))) + } else { + Err(SpComponentUpdateError::DifferentUpdateFailed(id)) + } + } + } +} diff --git a/nexus/src/app/update/host_phase1_updater.rs b/nexus/src/app/update/host_phase1_updater.rs new file mode 100644 index 0000000000..fb013d0ffe --- /dev/null +++ b/nexus/src/app/update/host_phase1_updater.rs @@ -0,0 +1,177 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module containing types for updating host OS phase1 images via MGS. + +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; +use super::MgsClients; +use super::SpComponentUpdateError; +use super::UpdateProgress; +use gateway_client::types::SpComponentFirmwareSlot; +use gateway_client::types::SpType; +use gateway_client::SpComponent; +use slog::Logger; +use tokio::sync::watch; +use uuid::Uuid; + +type GatewayClientError = gateway_client::Error; + +pub struct HostPhase1Updater { + log: Logger, + progress: watch::Sender>, + sp_type: SpType, + sp_slot: u32, + target_host_slot: u16, + update_id: Uuid, + // TODO-clarity maybe a newtype for this? TBD how we get this from + // wherever it's stored, which might give us a stronger type already. + phase1_data: Vec, +} + +impl HostPhase1Updater { + pub fn new( + sp_type: SpType, + sp_slot: u32, + target_host_slot: u16, + update_id: Uuid, + phase1_data: Vec, + log: &Logger, + ) -> Self { + let log = log.new(slog::o!( + "component" => "HostPhase1Updater", + "sp_type" => format!("{sp_type:?}"), + "sp_slot" => sp_slot, + "target_host_slot" => target_host_slot, + "update_id" => format!("{update_id}"), + )); + let progress = watch::Sender::new(None); + Self { + log, + progress, + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data, + } + } + + pub fn progress_watcher(&self) -> watch::Receiver> { + self.progress.subscribe() + } + + /// Drive this host phase 1 update to completion (or failure). + /// + /// Only one MGS instance is required to drive an update; however, if + /// multiple MGS instances are available and passed to this method and an + /// error occurs communicating with one instance, `HostPhase1Updater` will + /// try the remaining instances before failing. + pub async fn update( + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // The async block below wants a `&self` reference, but we take `self` + // for API clarity (to start a new update, the caller should construct a + // new instance of the updater). Create a `&self` ref that we use + // through the remainder of this method. + let me = &self; + + // Prior to delivering the update, ensure the correct target slot is + // activated. + // + // TODO-correctness Should we be doing this, or should a higher level + // executor set this up before calling us? + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.mark_target_slot_active(&client).await + }) + .await?; + + // Deliver and drive the update to completion + deliver_update(&mut self, mgs_clients).await?; + + // Unlike SP and RoT updates, we have nothing to do after delivery of + // the update completes; signal to any watchers that we're done. + self.progress.send_replace(Some(UpdateProgress::Complete)); + + // wait for any progress watchers to be dropped before we return; + // otherwise, they'll get `RecvError`s when trying to check the current + // status + self.progress.closed().await; + + Ok(()) + } + + async fn mark_target_slot_active( + &self, + client: &gateway_client::Client, + ) -> Result<(), GatewayClientError> { + // TODO-correctness Should we always persist this choice? + let persist = true; + + let slot = self.firmware_slot(); + + // TODO-correctness Until + // https://github.com/oxidecomputer/hubris/issues/1172 is fixed, the + // host must be in A2 for this operation to succeed. After it is fixed, + // there will still be a window while a host is booting where this + // operation can fail. How do we handle this? + client + .sp_component_active_slot_set( + self.sp_type, + self.sp_slot, + self.component(), + persist, + &SpComponentFirmwareSlot { slot }, + ) + .await?; + + // TODO-correctness Should we send some kind of update to + // `self.progress`? We haven't actually started delivering an update + // yet, but it seems weird to give no indication that we have + // successfully (potentially) modified the state of the target sled. + + info!( + self.log, "host phase1 target slot marked active"; + "mgs_addr" => client.baseurl(), + ); + + Ok(()) + } +} + +impl SpComponentUpdater for HostPhase1Updater { + fn component(&self) -> &'static str { + SpComponent::HOST_CPU_BOOT_FLASH.const_as_str() + } + + fn target_sp_type(&self) -> SpType { + self.sp_type + } + + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } + + fn firmware_slot(&self) -> u16 { + self.target_host_slot + } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.phase1_data.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log + } +} diff --git a/nexus/src/app/update/mgs_clients.rs b/nexus/src/app/update/mgs_clients.rs index 5915505829..4b200a1819 100644 --- a/nexus/src/app/update/mgs_clients.rs +++ b/nexus/src/app/update/mgs_clients.rs @@ -5,53 +5,14 @@ //! Module providing support for handling failover between multiple MGS clients use futures::Future; -use gateway_client::types::SpType; -use gateway_client::types::SpUpdateStatus; use gateway_client::Client; use slog::Logger; use std::collections::VecDeque; use std::sync::Arc; -use uuid::Uuid; pub(super) type GatewayClientError = gateway_client::Error; -pub(super) enum PollUpdateStatus { - Preparing { progress: Option }, - InProgress { progress: Option }, - Complete, -} - -#[derive(Debug, thiserror::Error)] -pub enum UpdateStatusError { - #[error("different update is now preparing ({0})")] - DifferentUpdatePreparing(Uuid), - #[error("different update is now in progress ({0})")] - DifferentUpdateInProgress(Uuid), - #[error("different update is now complete ({0})")] - DifferentUpdateComplete(Uuid), - #[error("different update is now aborted ({0})")] - DifferentUpdateAborted(Uuid), - #[error("different update failed ({0})")] - DifferentUpdateFailed(Uuid), - #[error("update status lost (did the SP reset?)")] - UpdateStatusLost, - #[error("update was aborted")] - UpdateAborted, - #[error("update failed (error code {0})")] - UpdateFailedWithCode(u32), - #[error("update failed (error message {0})")] - UpdateFailedWithMessage(String), -} - -#[derive(Debug, thiserror::Error)] -pub(super) enum PollUpdateStatusError { - #[error(transparent)] - StatusError(#[from] UpdateStatusError), - #[error(transparent)] - ClientError(#[from] GatewayClientError), -} - #[derive(Debug, Clone)] pub struct MgsClients { clients: VecDeque>, @@ -130,111 +91,4 @@ impl MgsClients { // errors. Return the error from the last MGS we tried. Err(GatewayClientError::CommunicationError(last_err.unwrap())) } - - /// Poll for the status of an expected-to-be-in-progress update. - pub(super) async fn poll_update_status( - &mut self, - sp_type: SpType, - sp_slot: u32, - component: &'static str, - update_id: Uuid, - log: &Logger, - ) -> Result { - let update_status = self - .try_all_serially(log, |client| async move { - let update_status = client - .sp_component_update_status(sp_type, sp_slot, component) - .await?; - - debug!( - log, "got update status"; - "mgs_addr" => client.baseurl(), - "status" => ?update_status, - ); - - Ok(update_status) - }) - .await? - .into_inner(); - - match update_status { - SpUpdateStatus::Preparing { id, progress } => { - if id == update_id { - let progress = progress.and_then(|progress| { - if progress.current > progress.total { - warn!( - log, "nonsense preparing progress"; - "current" => progress.current, - "total" => progress.total, - ); - None - } else if progress.total == 0 { - None - } else { - Some( - f64::from(progress.current) - / f64::from(progress.total), - ) - } - }); - Ok(PollUpdateStatus::Preparing { progress }) - } else { - Err(UpdateStatusError::DifferentUpdatePreparing(id).into()) - } - } - SpUpdateStatus::InProgress { id, bytes_received, total_bytes } => { - if id == update_id { - let progress = if bytes_received > total_bytes { - warn!( - log, "nonsense update progress"; - "bytes_received" => bytes_received, - "total_bytes" => total_bytes, - ); - None - } else if total_bytes == 0 { - None - } else { - Some(f64::from(bytes_received) / f64::from(total_bytes)) - }; - Ok(PollUpdateStatus::InProgress { progress }) - } else { - Err(UpdateStatusError::DifferentUpdateInProgress(id).into()) - } - } - SpUpdateStatus::Complete { id } => { - if id == update_id { - Ok(PollUpdateStatus::Complete) - } else { - Err(UpdateStatusError::DifferentUpdateComplete(id).into()) - } - } - SpUpdateStatus::None => { - Err(UpdateStatusError::UpdateStatusLost.into()) - } - SpUpdateStatus::Aborted { id } => { - if id == update_id { - Err(UpdateStatusError::UpdateAborted.into()) - } else { - Err(UpdateStatusError::DifferentUpdateAborted(id).into()) - } - } - SpUpdateStatus::Failed { code, id } => { - if id == update_id { - Err(UpdateStatusError::UpdateFailedWithCode(code).into()) - } else { - Err(UpdateStatusError::DifferentUpdateFailed(id).into()) - } - } - SpUpdateStatus::RotError { id, message } => { - if id == update_id { - Err(UpdateStatusError::UpdateFailedWithMessage(format!( - "rot error: {message}" - )) - .into()) - } else { - Err(UpdateStatusError::DifferentUpdateFailed(id).into()) - } - } - } - } } diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 7d5c642822..5075e421ae 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -26,13 +26,17 @@ use std::path::Path; use tokio::io::AsyncWriteExt; use uuid::Uuid; +mod common_sp_update; +mod host_phase1_updater; mod mgs_clients; mod rot_updater; mod sp_updater; -pub use mgs_clients::{MgsClients, UpdateStatusError}; -pub use rot_updater::{RotUpdateError, RotUpdater}; -pub use sp_updater::{SpUpdateError, SpUpdater}; +pub use common_sp_update::SpComponentUpdateError; +pub use host_phase1_updater::HostPhase1Updater; +pub use mgs_clients::MgsClients; +pub use rot_updater::RotUpdater; +pub use sp_updater::SpUpdater; #[derive(Debug, PartialEq, Clone)] pub enum UpdateProgress { diff --git a/nexus/src/app/update/rot_updater.rs b/nexus/src/app/update/rot_updater.rs index d7d21e3b3a..12126a7de9 100644 --- a/nexus/src/app/update/rot_updater.rs +++ b/nexus/src/app/update/rot_updater.rs @@ -4,40 +4,21 @@ //! Module containing types for updating RoTs via MGS. -use super::mgs_clients::PollUpdateStatusError; +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; use super::MgsClients; +use super::SpComponentUpdateError; use super::UpdateProgress; -use super::UpdateStatusError; -use crate::app::update::mgs_clients::PollUpdateStatus; use gateway_client::types::RotSlot; use gateway_client::types::SpComponentFirmwareSlot; use gateway_client::types::SpType; use gateway_client::SpComponent; use slog::Logger; -use std::time::Duration; use tokio::sync::watch; use uuid::Uuid; type GatewayClientError = gateway_client::Error; -#[derive(Debug, thiserror::Error)] -pub enum RotUpdateError { - #[error("error communicating with MGS")] - MgsCommunication(#[from] GatewayClientError), - - #[error("failed checking update status: {0}")] - PollUpdateStatus(#[from] UpdateStatusError), -} - -impl From for RotUpdateError { - fn from(err: PollUpdateStatusError) -> Self { - match err { - PollUpdateStatusError::StatusError(err) => err.into(), - PollUpdateStatusError::ClientError(err) => err.into(), - } - } -} - pub struct RotUpdater { log: Logger, progress: watch::Sender>, @@ -89,9 +70,14 @@ impl RotUpdater { /// error occurs communicating with one instance, `RotUpdater` will try the /// remaining instances before failing. pub async fn update( - self, - mut mgs_clients: MgsClients, - ) -> Result<(), RotUpdateError> { + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // Deliver and drive the update to "completion" (which isn't really + // complete for the RoT, since we still have to do the steps below after + // the delivery of the update completes). + deliver_update(&mut self, mgs_clients).await?; + // The async blocks below want `&self` references, but we take `self` // for API clarity (to start a new update, the caller should construct a // new updater). Create a `&self` ref that we use through the remainder @@ -100,23 +86,13 @@ impl RotUpdater { mgs_clients .try_all_serially(&self.log, |client| async move { - me.start_update_one_mgs(&client).await - }) - .await?; - - // `wait_for_update_completion` uses `try_all_mgs_clients` internally, - // so we don't wrap it here. - me.wait_for_update_completion(&mut mgs_clients).await?; - - mgs_clients - .try_all_serially(&self.log, |client| async move { - me.mark_target_slot_active_one_mgs(&client).await + me.mark_target_slot_active(&client).await }) .await?; mgs_clients .try_all_serially(&self.log, |client| async move { - me.finalize_update_via_reset_one_mgs(&client).await + me.finalize_update_via_reset(&client).await }) .await?; @@ -128,82 +104,7 @@ impl RotUpdater { Ok(()) } - async fn start_update_one_mgs( - &self, - client: &gateway_client::Client, - ) -> Result<(), GatewayClientError> { - let firmware_slot = self.target_rot_slot.as_u16(); - - // Start the update. - client - .sp_component_update( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - firmware_slot, - &self.update_id, - reqwest::Body::from(self.rot_hubris_archive.clone()), - ) - .await?; - - self.progress.send_replace(Some(UpdateProgress::Started)); - - info!( - self.log, "RoT update started"; - "mgs_addr" => client.baseurl(), - ); - - Ok(()) - } - - async fn wait_for_update_completion( - &self, - mgs_clients: &mut MgsClients, - ) -> Result<(), RotUpdateError> { - // How frequently do we poll MGS for the update progress? - const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); - - loop { - let status = mgs_clients - .poll_update_status( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - self.update_id, - &self.log, - ) - .await?; - - // For `Preparing` and `InProgress`, we could check the progress - // information returned by these steps and try to check that - // we're still _making_ progress, but every Nexus instance needs - // to do that anyway in case we (or the MGS instance delivering - // the update) crash, so we'll omit that check here. Instead, we - // just sleep and we'll poll again shortly. - match status { - PollUpdateStatus::Preparing { progress } => { - self.progress.send_replace(Some( - UpdateProgress::Preparing { progress }, - )); - } - PollUpdateStatus::InProgress { progress } => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress }, - )); - } - PollUpdateStatus::Complete => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress: Some(1.0) }, - )); - return Ok(()); - } - } - - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - } - } - - async fn mark_target_slot_active_one_mgs( + async fn mark_target_slot_active( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { @@ -211,13 +112,13 @@ impl RotUpdater { // tell it to persist our choice. let persist = true; - let slot = self.target_rot_slot.as_u16(); + let slot = self.firmware_slot(); client .sp_component_active_slot_set( self.sp_type, self.sp_slot, - SpComponent::ROT.const_as_str(), + self.component(), persist, &SpComponentFirmwareSlot { slot }, ) @@ -236,16 +137,12 @@ impl RotUpdater { Ok(()) } - async fn finalize_update_via_reset_one_mgs( + async fn finalize_update_via_reset( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { client - .sp_component_reset( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - ) + .sp_component_reset(self.sp_type, self.sp_slot, self.component()) .await?; self.progress.send_replace(Some(UpdateProgress::Complete)); @@ -258,15 +155,39 @@ impl RotUpdater { } } -trait RotSlotAsU16 { - fn as_u16(&self) -> u16; -} +impl SpComponentUpdater for RotUpdater { + fn component(&self) -> &'static str { + SpComponent::ROT.const_as_str() + } + + fn target_sp_type(&self) -> SpType { + self.sp_type + } -impl RotSlotAsU16 for RotSlot { - fn as_u16(&self) -> u16 { - match self { + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } + + fn firmware_slot(&self) -> u16 { + match self.target_rot_slot { RotSlot::A => 0, RotSlot::B => 1, } } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.rot_hubris_archive.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log + } } diff --git a/nexus/src/app/update/sp_updater.rs b/nexus/src/app/update/sp_updater.rs index 419a733441..2a6ddc6de6 100644 --- a/nexus/src/app/update/sp_updater.rs +++ b/nexus/src/app/update/sp_updater.rs @@ -4,39 +4,19 @@ //! Module containing types for updating SPs via MGS. -use crate::app::update::mgs_clients::PollUpdateStatus; - -use super::mgs_clients::PollUpdateStatusError; +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; use super::MgsClients; +use super::SpComponentUpdateError; use super::UpdateProgress; -use super::UpdateStatusError; use gateway_client::types::SpType; use gateway_client::SpComponent; use slog::Logger; -use std::time::Duration; use tokio::sync::watch; use uuid::Uuid; type GatewayClientError = gateway_client::Error; -#[derive(Debug, thiserror::Error)] -pub enum SpUpdateError { - #[error("error communicating with MGS")] - MgsCommunication(#[from] GatewayClientError), - - #[error("failed checking update status: {0}")] - PollUpdateStatus(#[from] UpdateStatusError), -} - -impl From for SpUpdateError { - fn from(err: PollUpdateStatusError) -> Self { - match err { - PollUpdateStatusError::StatusError(err) => err.into(), - PollUpdateStatusError::ClientError(err) => err.into(), - } - } -} - pub struct SpUpdater { log: Logger, progress: watch::Sender>, @@ -77,10 +57,15 @@ impl SpUpdater { /// error occurs communicating with one instance, `SpUpdater` will try the /// remaining instances before failing. pub async fn update( - self, - mut mgs_clients: MgsClients, - ) -> Result<(), SpUpdateError> { - // The async blocks below want `&self` references, but we take `self` + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // Deliver and drive the update to "completion" (which isn't really + // complete for the SP, since we still have to reset it after the + // delivery of the update completes). + deliver_update(&mut self, mgs_clients).await?; + + // The async block below wants a `&self` reference, but we take `self` // for API clarity (to start a new SP update, the caller should // construct a new `SpUpdater`). Create a `&self` ref that we use // through the remainder of this method. @@ -88,17 +73,7 @@ impl SpUpdater { mgs_clients .try_all_serially(&self.log, |client| async move { - me.start_update_one_mgs(&client).await - }) - .await?; - - // `wait_for_update_completion` uses `try_all_mgs_clients` internally, - // so we don't wrap it here. - me.wait_for_update_completion(&mut mgs_clients).await?; - - mgs_clients - .try_all_serially(&self.log, |client| async move { - me.finalize_update_via_reset_one_mgs(&client).await + me.finalize_update_via_reset(&client).await }) .await?; @@ -110,102 +85,57 @@ impl SpUpdater { Ok(()) } - async fn start_update_one_mgs( + async fn finalize_update_via_reset( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { - // The SP has two firmware slots, but they're aren't individually - // labled. We always request an update to slot 0, which means "the - // inactive slot". - let firmware_slot = 0; - - // Start the update. client - .sp_component_update( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - firmware_slot, - &self.update_id, - reqwest::Body::from(self.sp_hubris_archive.clone()), - ) + .sp_component_reset(self.sp_type, self.sp_slot, self.component()) .await?; - self.progress.send_replace(Some(UpdateProgress::Started)); - + self.progress.send_replace(Some(UpdateProgress::Complete)); info!( - self.log, "SP update started"; + self.log, "SP update complete"; "mgs_addr" => client.baseurl(), ); Ok(()) } +} - async fn wait_for_update_completion( - &self, - mgs_clients: &mut MgsClients, - ) -> Result<(), SpUpdateError> { - // How frequently do we poll MGS for the update progress? - const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); - - loop { - let status = mgs_clients - .poll_update_status( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - self.update_id, - &self.log, - ) - .await?; - - // For `Preparing` and `InProgress`, we could check the progress - // information returned by these steps and try to check that - // we're still _making_ progress, but every Nexus instance needs - // to do that anyway in case we (or the MGS instance delivering - // the update) crash, so we'll omit that check here. Instead, we - // just sleep and we'll poll again shortly. - match status { - PollUpdateStatus::Preparing { progress } => { - self.progress.send_replace(Some( - UpdateProgress::Preparing { progress }, - )); - } - PollUpdateStatus::InProgress { progress } => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress }, - )); - } - PollUpdateStatus::Complete => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress: Some(1.0) }, - )); - return Ok(()); - } - } - - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - } +impl SpComponentUpdater for SpUpdater { + fn component(&self) -> &'static str { + SpComponent::SP_ITSELF.const_as_str() } - async fn finalize_update_via_reset_one_mgs( - &self, - client: &gateway_client::Client, - ) -> Result<(), GatewayClientError> { - client - .sp_component_reset( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - ) - .await?; + fn target_sp_type(&self) -> SpType { + self.sp_type + } - self.progress.send_replace(Some(UpdateProgress::Complete)); - info!( - self.log, "SP update complete"; - "mgs_addr" => client.baseurl(), - ); + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } - Ok(()) + fn firmware_slot(&self) -> u16 { + // The SP has two firmware slots, but they're aren't individually + // labled. We always request an update to slot 0, which means "the + // inactive slot". + 0 + } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.sp_hubris_archive.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log } } diff --git a/nexus/tests/integration_tests/host_phase1_updater.rs b/nexus/tests/integration_tests/host_phase1_updater.rs new file mode 100644 index 0000000000..01d546636e --- /dev/null +++ b/nexus/tests/integration_tests/host_phase1_updater.rs @@ -0,0 +1,584 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests `HostPhase1Updater`'s delivery of updates to host phase 1 flash via +//! MGS to SP. + +use gateway_client::types::SpType; +use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; +use gateway_test_utils::setup as mgs_setup; +use omicron_nexus::app::test_interfaces::{ + HostPhase1Updater, MgsClients, UpdateProgress, +}; +use rand::RngCore; +use sp_sim::SimulatedSp; +use std::mem; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::io::AsyncWriteExt; +use tokio::net::TcpListener; +use tokio::net::TcpStream; +use tokio::sync::mpsc; +use uuid::Uuid; + +fn make_fake_host_phase1_image() -> Vec { + let mut image = vec![0; 128]; + rand::thread_rng().fill_bytes(&mut image); + image +} + +#[tokio::test] +async fn test_host_phase1_updater_updates_sled() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_updates_sled", + SpPort::One, + ) + .await; + + // Configure an MGS client. + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + for target_host_slot in [0, 1] { + // Configure and instantiate an `HostPhase1Updater`. + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + // Run the update. + host_phase1_updater + .update(&mut mgs_clients) + .await + .expect("update failed"); + + // Ensure the SP received the complete update. + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + } + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_remembers_successful_mgs_instance() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_remembers_successful_mgs_instance", + SpPort::One, + ) + .await; + + // Also start a local TCP server that we will claim is an MGS instance, but + // it will close connections immediately after accepting them. This will + // allow us to count how many connections it receives, while simultaneously + // causing errors in the HostPhase1Updater when it attempts to use this + // "MGS". + let (failing_mgs_task, failing_mgs_addr, failing_mgs_conn_counter) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let conn_count = Arc::new(AtomicUsize::new(0)); + + let task = { + let conn_count = Arc::clone(&conn_count); + tokio::spawn(async move { + loop { + let (mut stream, _peer) = socket.accept().await.unwrap(); + conn_count.fetch_add(1, Ordering::SeqCst); + stream.shutdown().await.unwrap(); + } + }) + }; + + (task, addr, conn_count) + }; + + // Order the MGS clients such that the bogus MGS that immediately closes + // connections comes first. `HostPhase1Updater` should remember that the + // second MGS instance succeeds, and only send subsequent requests to it: we + // should only see a single attempted connection to the bogus MGS, even + // though delivering an update requires a bare minimum of three requests + // (start the update, query the status, reset the SP) and often more (if + // repeated queries are required to wait for completion). + let mut mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new( + &format!("http://{failing_mgs_addr}"), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + host_phase1_updater.update(&mut mgs_clients).await.expect("update failed"); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + // Check that our bogus MGS only received a single connection attempt. + // (After HostPhase1Updater failed to talk to this instance, it should have + // fallen back to the valid one for all further requests.) + assert_eq!( + failing_mgs_conn_counter.load(Ordering::SeqCst), + 1, + "bogus MGS instance didn't receive the expected number of connections" + ); + failing_mgs_task.abort(); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_switches_mgs_instances_on_failure() { + enum MgsProxy { + One(TcpStream), + Two(TcpStream), + } + + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_switches_mgs_instances_on_failure", + SpPort::One, + ) + .await; + let mgs_bind_addr = mgstestctx.client.bind_address; + + let spawn_mgs_proxy_task = |mut stream: TcpStream| { + tokio::spawn(async move { + let mut mgs_stream = TcpStream::connect(mgs_bind_addr) + .await + .expect("failed to connect to MGS"); + tokio::io::copy_bidirectional(&mut stream, &mut mgs_stream) + .await + .expect("failed to proxy connection to MGS"); + }) + }; + + // Start two MGS proxy tasks; when each receives an incoming TCP connection, + // it forwards that `TcpStream` along the `mgs_proxy_connections` channel + // along with a tag of which proxy it is. We'll use this below to flip flop + // between MGS "instances" (really these two proxies). + let (mgs_proxy_connections_tx, mut mgs_proxy_connections_rx) = + mpsc::unbounded_channel(); + let (mgs_proxy_one_task, mgs_proxy_one_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let mgs_proxy_connections_tx = mgs_proxy_connections_tx.clone(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::One(stream)).unwrap(); + } + }); + (task, addr) + }; + let (mgs_proxy_two_task, mgs_proxy_two_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::Two(stream)).unwrap(); + } + }); + (task, addr) + }; + + // Disable connection pooling so each request gets a new TCP connection. + let client = + reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); + + // Configure two MGS clients pointed at our two proxy tasks. + let mut mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_one_addr}"), + client.clone(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_two_addr}"), + client, + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient2")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + // Spawn the actual update task. + let mut update_task = tokio::spawn(async move { + host_phase1_updater.update(&mut mgs_clients).await + }); + + // Loop over incoming requests. We expect this sequence: + // + // 1. Connection arrives on the first proxy + // 2. We spawn a task to service that request, and set `should_swap` + // 3. Connection arrives on the first proxy + // 4. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // 5. Connection arrives on the second proxy + // 6. We spawn a task to service that request, and set `should_swap` + // 7. Connection arrives on the second proxy + // 8. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // + // ... repeat until the update is complete. + let mut expected_proxy = 0; + let mut proxy_one_count = 0; + let mut proxy_two_count = 0; + let mut total_requests_handled = 0; + let mut should_swap = false; + loop { + tokio::select! { + Some(proxy_stream) = mgs_proxy_connections_rx.recv() => { + let stream = match proxy_stream { + MgsProxy::One(stream) => { + assert_eq!(expected_proxy, 0); + proxy_one_count += 1; + stream + } + MgsProxy::Two(stream) => { + assert_eq!(expected_proxy, 1); + proxy_two_count += 1; + stream + } + }; + + // Should we trigger `HostPhase1Updater` to swap to the other + // MGS (proxy)? If so, do that by dropping this connection + // (which will cause a client failure) and note that we expect + // the next incoming request to come on the other proxy. + if should_swap { + mem::drop(stream); + expected_proxy ^= 1; + should_swap = false; + } else { + // Otherwise, handle this connection. + total_requests_handled += 1; + spawn_mgs_proxy_task(stream); + should_swap = true; + } + } + + result = &mut update_task => { + match result { + Ok(Ok(())) => { + mgs_proxy_one_task.abort(); + mgs_proxy_two_task.abort(); + break; + } + Ok(Err(err)) => panic!("update failed: {err}"), + Err(err) => panic!("update task panicked: {err}"), + } + } + } + } + + // A host flash update requires a minimum of 3 requests to MGS: set the + // active flash slot, post the update, and check the status. There may be + // more requests if the update is not yet complete when the status is + // checked, but we can just check that each of our proxies received at least + // 2 incoming requests; based on our outline above, if we got the minimum of + // 3 requests, it would look like this: + // + // 1. POST update -> first proxy (success) + // 2. GET status -> first proxy (fail) + // 3. GET status retry -> second proxy (success) + // 4. POST reset -> second proxy (fail) + // 5. POST reset -> first proxy (success) + // + // This pattern would repeat if multiple status requests were required, so + // we always expect the first proxy to see exactly one more connection + // attempt than the second (because it went first before they started + // swapping), and the two together should see a total of one less than + // double the number of successful requests required. + assert!(total_requests_handled >= 3); + assert_eq!(proxy_one_count, proxy_two_count + 1); + assert_eq!( + (proxy_one_count + proxy_two_count + 1) / 2, + total_requests_handled + ); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_delivers_progress() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_delivers_progress", + SpPort::One, + ) + .await; + + // Configure an MGS client. + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + let phase1_data_len = phase1_data.len() as u32; + + // Subscribe to update progress, and check that there is no status yet; we + // haven't started the update. + let mut progress = host_phase1_updater.progress_watcher(); + assert_eq!(*progress.borrow_and_update(), None); + + // Install a semaphore on the requests our target SP will receive so we can + // inspect progress messages without racing. + let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; + let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; + let mut sp_responses = target_sp.responses_sent_count().unwrap(); + + // Spawn the update on a background task so we can watch `progress` as it is + // applied. + let do_update_task = tokio::spawn(async move { + host_phase1_updater.update(&mut mgs_clients).await + }); + + // Allow the SP to respond to 2 messages: the message to activate the target + // flash slot and the "prepare update" messages that triggers the start of an + // update, then ensure we see the "started" progress. + sp_accept_sema.send(2).unwrap(); + progress.changed().await.unwrap(); + assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); + + // Ensure our simulated SP is in the state we expect: it's prepared for an + // update but has not yet received any data. + assert_eq!( + target_sp.current_update_status().await, + UpdateStatus::InProgress(UpdateInProgressStatus { + id: update_id.into(), + bytes_received: 0, + total_size: phase1_data_len, + }) + ); + + // Record the number of responses the SP has sent; we'll use + // `sp_responses.changed()` in the loop below, and want to mark whatever + // value this watch channel currently has as seen. + sp_responses.borrow_and_update(); + + // At this point, there are two clients racing each other to talk to our + // simulated SP: + // + // 1. MGS is trying to deliver the update + // 2. `host_phase1_updater` is trying to poll (via MGS) for update status + // + // and we want to ensure that we see any relevant progress reports from + // `host_phase1_updater`. We'll let one MGS -> SP message through at a time + // (waiting until our SP has responded by waiting for a change to + // `sp_responses`) then check its update state: if it changed, the packet we + // let through was data from MGS; otherwise, it was a status request from + // `host_phase1_updater`. + // + // This loop will continue until either: + // + // 1. We see an `UpdateStatus::InProgress` message indicating 100% delivery, + // at which point we break out of the loop + // 2. We time out waiting for the previous step (by timing out for either + // the SP to process a request or `host_phase1_updater` to realize + // there's been progress), at which point we panic and fail this test. + let mut prev_bytes_received = 0; + let mut expect_progress_change = false; + loop { + // Allow the SP to accept and respond to a single UDP packet. + sp_accept_sema.send(1).unwrap(); + + // Wait until the SP has sent a response, with a safety rail that we + // haven't screwed up our untangle-the-race logic: if we don't see the + // SP process any new messages after several seconds, our test is + // broken, so fail. + tokio::time::timeout(Duration::from_secs(10), sp_responses.changed()) + .await + .expect("timeout waiting for SP response count to change") + .expect("sp response count sender dropped"); + + // Inspec the SP's in-memory update state; we expect only `InProgress` + // or `Complete`, and in either case we note whether we expect to see + // status changes from `host_phase1_updater`. + match target_sp.current_update_status().await { + UpdateStatus::InProgress(sp_progress) => { + if sp_progress.bytes_received > prev_bytes_received { + prev_bytes_received = sp_progress.bytes_received; + expect_progress_change = true; + continue; + } + } + UpdateStatus::Complete(_) => { + if prev_bytes_received < phase1_data_len { + break; + } + } + status @ (UpdateStatus::None + | UpdateStatus::Preparing(_) + | UpdateStatus::SpUpdateAuxFlashChckScan { .. } + | UpdateStatus::Aborted(_) + | UpdateStatus::Failed { .. } + | UpdateStatus::RotError { .. }) => { + panic!("unexpected status {status:?}"); + } + } + + // If we get here, the most recent packet did _not_ change the SP's + // internal update state, so it was a status request from + // `host_phase1_updater`. If we expect the updater to see new progress, + // wait for that change here. + if expect_progress_change { + // Safety rail that we haven't screwed up our untangle-the-race + // logic: if we don't see a new progress after several seconds, our + // test is broken, so fail. + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + expect_progress_change = false; + + assert!( + matches!(status, UpdateProgress::InProgress { .. }), + "unexpected progress status {status:?}" + ); + } + } + + // We know the SP has received a complete update, but `HostPhase1Updater` + // may still need to request status to realize that; release the socket + // semaphore so the SP can respond. + sp_accept_sema.send(usize::MAX).unwrap(); + + // Unlike the SP and RoT cases, there are no MGS/SP steps in between the + // update completing and `HostPhase1Updater` sending + // `UpdateProgress::Complete`. Therefore, it's a race whether we'll see + // some number of `InProgress` status before `Complete`, but we should + // quickly move to `Complete`. + loop { + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + match status { + UpdateProgress::Complete => break, + UpdateProgress::InProgress { .. } => continue, + _ => panic!("unexpected progress status {status:?}"), + } + } + + // drop our progress receiver so `do_update_task` can complete + mem::drop(progress); + + do_update_task.await.expect("update task panicked").expect("update failed"); + + let last_update_image = target_sp + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + mgstestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 87c5c74f0f..4d7b41cfa8 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -12,6 +12,7 @@ mod commands; mod console_api; mod device_auth; mod disks; +mod host_phase1_updater; mod images; mod initialization; mod instances; diff --git a/nexus/tests/integration_tests/rot_updater.rs b/nexus/tests/integration_tests/rot_updater.rs index 750f9571d0..2e6d65f8b1 100644 --- a/nexus/tests/integration_tests/rot_updater.rs +++ b/nexus/tests/integration_tests/rot_updater.rs @@ -45,10 +45,11 @@ async fn test_rot_updater_updates_sled() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); // Configure and instantiate an `RotUpdater`. let sp_type = SpType::Sled; @@ -67,7 +68,7 @@ async fn test_rot_updater_updates_sled() { ); // Run the update. - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); // Ensure the RoT received the complete update. let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] @@ -97,10 +98,11 @@ async fn test_rot_updater_updates_switch() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Switch; let sp_slot = 0; @@ -117,7 +119,7 @@ async fn test_rot_updater_updates_switch() { &mgstestctx.logctx.log, ); - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] .last_rot_update_data() @@ -177,7 +179,7 @@ async fn test_rot_updater_remembers_successful_mgs_instance() { // delivering an update requires a bare minimum of three requests (start the // update, query the status, reset the RoT) and often more (if repeated // queries are required to wait for completion). - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new( &format!("http://{failing_mgs_addr}"), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), @@ -203,7 +205,7 @@ async fn test_rot_updater_remembers_successful_mgs_instance() { &mgstestctx.logctx.log, ); - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] .last_rot_update_data() @@ -295,7 +297,7 @@ async fn test_rot_updater_switches_mgs_instances_on_failure() { reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); // Configure two MGS clients pointed at our two proxy tasks. - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_one_addr}"), client.clone(), @@ -324,7 +326,8 @@ async fn test_rot_updater_switches_mgs_instances_on_failure() { ); // Spawn the actual update task. - let mut update_task = tokio::spawn(rot_updater.update(mgs_clients)); + let mut update_task = + tokio::spawn(async move { rot_updater.update(&mut mgs_clients).await }); // Loop over incoming requests. We expect this sequence: // @@ -447,10 +450,11 @@ async fn test_rot_updater_delivers_progress() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -483,10 +487,11 @@ async fn test_rot_updater_delivers_progress() { // Spawn the update on a background task so we can watch `progress` as it is // applied. - let do_update_task = tokio::spawn(rot_updater.update(mgs_clients)); + let do_update_task = + tokio::spawn(async move { rot_updater.update(&mut mgs_clients).await }); // Allow the SP to respond to 1 message: the "prepare update" messages that - // trigger the start of an update, then ensure we see the "started" + // triggers the start of an update, then ensure we see the "started" // progress. sp_accept_sema.send(1).unwrap(); progress.changed().await.unwrap(); @@ -556,7 +561,6 @@ async fn test_rot_updater_delivers_progress() { UpdateStatus::Complete(_) => { if prev_bytes_received < rot_image_len { prev_bytes_received = rot_image_len; - continue; } } status @ (UpdateStatus::None diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 89735ac3d9..1b6764e609 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -46,10 +46,11 @@ async fn test_sp_updater_updates_sled() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); // Configure and instantiate an `SpUpdater`. let sp_type = SpType::Sled; @@ -66,7 +67,7 @@ async fn test_sp_updater_updates_sled() { ); // Run the update. - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); // Ensure the SP received the complete update. let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] @@ -96,10 +97,11 @@ async fn test_sp_updater_updates_switch() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Switch; let sp_slot = 0; @@ -114,7 +116,7 @@ async fn test_sp_updater_updates_switch() { &mgstestctx.logctx.log, ); - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] .last_sp_update_data() @@ -174,7 +176,7 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { // delivering an update requires a bare minimum of three requests (start the // update, query the status, reset the SP) and often more (if repeated // queries are required to wait for completion). - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new( &format!("http://{failing_mgs_addr}"), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), @@ -198,7 +200,7 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { &mgstestctx.logctx.log, ); - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] .last_sp_update_data() @@ -290,7 +292,7 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); // Configure two MGS clients pointed at our two proxy tasks. - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_one_addr}"), client.clone(), @@ -317,7 +319,8 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { ); // Spawn the actual update task. - let mut update_task = tokio::spawn(sp_updater.update(mgs_clients)); + let mut update_task = + tokio::spawn(async move { sp_updater.update(&mut mgs_clients).await }); // Loop over incoming requests. We expect this sequence: // @@ -436,10 +439,11 @@ async fn test_sp_updater_delivers_progress() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -470,10 +474,11 @@ async fn test_sp_updater_delivers_progress() { // Spawn the update on a background task so we can watch `progress` as it is // applied. - let do_update_task = tokio::spawn(sp_updater.update(mgs_clients)); + let do_update_task = + tokio::spawn(async move { sp_updater.update(&mut mgs_clients).await }); // Allow the SP to respond to 2 messages: the caboose check and the "prepare - // update" messages that trigger the start of an update, then ensure we see + // update" messages that triggers the start of an update, then ensure we see // the "started" progress. sp_accept_sema.send(2).unwrap(); progress.changed().await.unwrap(); @@ -543,7 +548,6 @@ async fn test_sp_updater_delivers_progress() { UpdateStatus::Complete(_) => { if prev_bytes_received < sp_image_len { prev_bytes_received = sp_image_len; - continue; } } status @ (UpdateStatus::None diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 635e8fde6b..5cfad94c86 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -123,6 +123,15 @@ impl SimulatedSp for Gimlet { handler.update_state.last_rot_update_data() } + async fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option> { + let handler = self.handler.as_ref()?; + let handler = handler.lock().await; + handler.update_state.last_host_phase1_update_data(slot) + } + async fn current_update_status(&self) -> gateway_messages::UpdateStatus { let Some(handler) = self.handler.as_ref() else { return gateway_messages::UpdateStatus::None; @@ -1188,7 +1197,7 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( + debug!( &self.log, "asked for component active slot"; "sender" => %sender, "port" => ?port, @@ -1211,7 +1220,7 @@ impl SpHandler for Handler { slot: u16, persist: bool, ) -> Result<(), SpError> { - warn!( + debug!( &self.log, "asked to set component active slot"; "sender" => %sender, "port" => ?port, @@ -1222,9 +1231,12 @@ impl SpHandler for Handler { if component == SpComponent::ROT { self.rot_active_slot = rot_slot_id_from_u16(slot)?; Ok(()) + } else if component == SpComponent::HOST_CPU_BOOT_FLASH { + self.update_state.set_active_host_slot(slot); + Ok(()) } else { // The real SP returns `RequestUnsupportedForComponent` for anything - // other than the RoT, including SP_ITSELF. + // other than the RoT and host boot flash, including SP_ITSELF. Err(SpError::RequestUnsupportedForComponent) } } diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0958e8a177..87643af9a8 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -68,6 +68,12 @@ pub trait SimulatedSp { /// Only returns data after a simulated reset of the RoT. async fn last_rot_update_data(&self) -> Option>; + /// Get the last completed update delivered to the host phase1 flash slot. + async fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option>; + /// Get the current update status, just as would be returned by an MGS /// request to get the update status. async fn current_update_status(&self) -> gateway_messages::UpdateStatus; diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index 19e84ffc64..1bd6fe4964 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -134,6 +134,14 @@ impl SimulatedSp for Sidecar { handler.update_state.last_rot_update_data() } + async fn last_host_phase1_update_data( + &self, + _slot: u16, + ) -> Option> { + // sidecars do not have attached hosts + None + } + async fn current_update_status(&self) -> gateway_messages::UpdateStatus { let Some(handler) = self.handler.as_ref() else { return gateway_messages::UpdateStatus::None; diff --git a/sp-sim/src/update.rs b/sp-sim/src/update.rs index 9879a3ecde..0efa730a26 100644 --- a/sp-sim/src/update.rs +++ b/sp-sim/src/update.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::collections::BTreeMap; use std::io::Cursor; use std::mem; @@ -15,6 +16,8 @@ pub(crate) struct SimSpUpdate { state: UpdateState, last_sp_update_data: Option>, last_rot_update_data: Option>, + last_host_phase1_update_data: BTreeMap>, + active_host_slot: Option, } impl Default for SimSpUpdate { @@ -23,6 +26,13 @@ impl Default for SimSpUpdate { state: UpdateState::NotPrepared, last_sp_update_data: None, last_rot_update_data: None, + last_host_phase1_update_data: BTreeMap::new(), + + // In the real SP, there is always _some_ active host slot. We could + // emulate that by always defaulting to slot 0, but instead we'll + // ensure any tests that expect to read or write a particular slot + // set that slot as active first. + active_host_slot: None, } } } @@ -43,9 +53,20 @@ impl SimSpUpdate { UpdateState::NotPrepared | UpdateState::Aborted(_) | UpdateState::Completed { .. } => { + let slot = if component == SpComponent::HOST_CPU_BOOT_FLASH { + match self.active_host_slot { + Some(slot) => slot, + None => return Err(SpError::InvalidSlotForComponent), + } + } else { + // We don't manage SP or RoT slots, so just use 0 + 0 + }; + self.state = UpdateState::Prepared { component, id, + slot, data: Cursor::new(vec![0u8; total_size].into_boxed_slice()), }; Ok(()) @@ -63,7 +84,7 @@ impl SimSpUpdate { chunk_data: &[u8], ) -> Result<(), SpError> { match &mut self.state { - UpdateState::Prepared { component, id, data } => { + UpdateState::Prepared { component, id, slot, data } => { // Ensure that the update ID and target component are correct. if chunk.id != *id || chunk.component != *component { return Err(SpError::InvalidUpdateId { sp_update_id: *id }); @@ -84,10 +105,17 @@ impl SimSpUpdate { if data.position() == data.get_ref().len() as u64 { let mut stolen = Cursor::new(Box::default()); mem::swap(data, &mut stolen); + let data = stolen.into_inner(); + + if *component == SpComponent::HOST_CPU_BOOT_FLASH { + self.last_host_phase1_update_data + .insert(*slot, data.clone()); + } + self.state = UpdateState::Completed { component: *component, id: *id, - data: stolen.into_inner(), + data, }; } @@ -150,6 +178,17 @@ impl SimSpUpdate { pub(crate) fn last_rot_update_data(&self) -> Option> { self.last_rot_update_data.clone() } + + pub(crate) fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option> { + self.last_host_phase1_update_data.get(&slot).cloned() + } + + pub(crate) fn set_active_host_slot(&mut self, slot: u16) { + self.active_host_slot = Some(slot); + } } enum UpdateState { @@ -157,6 +196,7 @@ enum UpdateState { Prepared { component: SpComponent, id: UpdateId, + slot: u16, // data would ordinarily be a Cursor>, but that can grow and // reallocate. We want to ensure that we don't receive any more data // than originally promised, so use a Cursor> to ensure that From b6ebaaad31e376fe6c64ff2b9b54e612fddfd91a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:00:53 -0800 Subject: [PATCH 036/186] Bump openssl from 0.10.57 to 0.10.60 (#4569) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6580e1de55..5ccaa2c3d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5099,9 +5099,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" dependencies = [ "bitflags 2.4.0", "cfg-if 1.0.0", @@ -5131,9 +5131,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" dependencies = [ "cc", "libc", From 7ef3631ed95830a0120fe7832d0a88e2155b2613 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:01:20 -0800 Subject: [PATCH 037/186] Update Rust crate url to 2.5.0 (#4584) --- Cargo.lock | 18 +++++++++--------- tufaceous-lib/Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ccaa2c3d1..358ec1f9b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2276,9 +2276,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -3040,9 +3040,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -5631,9 +5631,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" @@ -9106,12 +9106,12 @@ dependencies = [ [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", - "idna 0.4.0", + "idna 0.5.0", "percent-encoding", ] diff --git a/tufaceous-lib/Cargo.toml b/tufaceous-lib/Cargo.toml index 0df3a33f98..aa9a26e3bb 100644 --- a/tufaceous-lib/Cargo.toml +++ b/tufaceous-lib/Cargo.toml @@ -33,7 +33,7 @@ tar.workspace = true tokio.workspace = true toml.workspace = true tough.workspace = true -url = "2.4.1" +url = "2.5.0" zip.workspace = true omicron-workspace-hack.workspace = true From e3887d54e959401a169b40ae88fa9a675e2bdfff Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 30 Nov 2023 12:55:19 -0800 Subject: [PATCH 038/186] `ServiceZoneRequest` is too general (#4466) --- Cargo.lock | 1 + clients/sled-agent-client/Cargo.toml | 1 + clients/sled-agent-client/src/lib.rs | 26 +- common/src/api/external/mod.rs | 1 + common/src/ledger.rs | 1 + openapi/sled-agent.json | 1033 +++--- schema/all-zone-requests.json | 11 +- schema/all-zones-requests.json | 632 ++++ ...ice-plan.json => rss-service-plan-v2.json} | 518 +-- .../src/bin/services-ledger-check-migrate.rs | 80 + sled-agent/src/bootstrap/params.rs | 11 + sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/http_entrypoints.rs | 53 +- sled-agent/src/lib.rs | 3 +- sled-agent/src/params.rs | 955 +++-- sled-agent/src/rack_setup/plan/service.rs | 423 ++- sled-agent/src/rack_setup/service.rs | 572 ++- sled-agent/src/services.rs | 3205 ++++++++++------- sled-agent/src/services_migration.rs | 624 ++++ sled-agent/src/sled_agent.rs | 41 +- .../old-service-ledgers/rack2-sled10.json | 1 + .../old-service-ledgers/rack2-sled11.json | 1 + .../old-service-ledgers/rack2-sled12.json | 1 + .../old-service-ledgers/rack2-sled14.json | 1 + .../old-service-ledgers/rack2-sled16.json | 1 + .../old-service-ledgers/rack2-sled17.json | 1 + .../old-service-ledgers/rack2-sled21.json | 1 + .../old-service-ledgers/rack2-sled23.json | 1 + .../old-service-ledgers/rack2-sled25.json | 1 + .../old-service-ledgers/rack2-sled8.json | 1 + .../old-service-ledgers/rack2-sled9.json | 1 + .../old-service-ledgers/rack3-sled0.json | 1 + .../old-service-ledgers/rack3-sled1.json | 1 + .../old-service-ledgers/rack3-sled11.json | 1 + .../old-service-ledgers/rack3-sled12.json | 1 + .../old-service-ledgers/rack3-sled13.json | 1 + .../old-service-ledgers/rack3-sled14.json | 1 + .../old-service-ledgers/rack3-sled15.json | 1 + .../old-service-ledgers/rack3-sled16.json | 1 + .../old-service-ledgers/rack3-sled17.json | 1 + .../old-service-ledgers/rack3-sled18.json | 1 + .../old-service-ledgers/rack3-sled19.json | 1 + .../old-service-ledgers/rack3-sled2.json | 1 + .../old-service-ledgers/rack3-sled20.json | 1 + .../old-service-ledgers/rack3-sled21.json | 1 + .../old-service-ledgers/rack3-sled22.json | 1 + .../old-service-ledgers/rack3-sled23.json | 1 + .../old-service-ledgers/rack3-sled24.json | 1 + .../old-service-ledgers/rack3-sled25.json | 1 + .../old-service-ledgers/rack3-sled26.json | 1 + .../old-service-ledgers/rack3-sled27.json | 1 + .../old-service-ledgers/rack3-sled28.json | 1 + .../old-service-ledgers/rack3-sled29.json | 1 + .../old-service-ledgers/rack3-sled3.json | 1 + .../old-service-ledgers/rack3-sled30.json | 1 + .../old-service-ledgers/rack3-sled31.json | 1 + .../old-service-ledgers/rack3-sled4.json | 1 + .../old-service-ledgers/rack3-sled5.json | 1 + .../old-service-ledgers/rack3-sled6.json | 1 + .../old-service-ledgers/rack3-sled7.json | 1 + .../old-service-ledgers/rack3-sled8.json | 1 + .../old-service-ledgers/rack3-sled9.json | 1 + .../new-zones-ledgers/rack2-sled10.json | 195 + .../new-zones-ledgers/rack2-sled11.json | 196 + .../new-zones-ledgers/rack2-sled12.json | 232 ++ .../new-zones-ledgers/rack2-sled14.json | 192 + .../new-zones-ledgers/rack2-sled16.json | 192 + .../new-zones-ledgers/rack2-sled17.json | 181 + .../new-zones-ledgers/rack2-sled21.json | 232 ++ .../new-zones-ledgers/rack2-sled23.json | 195 + .../new-zones-ledgers/rack2-sled25.json | 196 + .../output/new-zones-ledgers/rack2-sled8.json | 198 + .../output/new-zones-ledgers/rack2-sled9.json | 192 + .../output/new-zones-ledgers/rack3-sled0.json | 181 + .../output/new-zones-ledgers/rack3-sled1.json | 167 + .../new-zones-ledgers/rack3-sled11.json | 201 ++ .../new-zones-ledgers/rack3-sled12.json | 181 + .../new-zones-ledgers/rack3-sled13.json | 201 ++ .../new-zones-ledgers/rack3-sled14.json | 198 + .../new-zones-ledgers/rack3-sled15.json | 196 + .../new-zones-ledgers/rack3-sled16.json | 167 + .../new-zones-ledgers/rack3-sled17.json | 167 + .../new-zones-ledgers/rack3-sled18.json | 167 + .../new-zones-ledgers/rack3-sled19.json | 181 + .../output/new-zones-ledgers/rack3-sled2.json | 167 + .../new-zones-ledgers/rack3-sled20.json | 198 + .../new-zones-ledgers/rack3-sled21.json | 167 + .../new-zones-ledgers/rack3-sled22.json | 167 + .../new-zones-ledgers/rack3-sled23.json | 181 + .../new-zones-ledgers/rack3-sled24.json | 167 + .../new-zones-ledgers/rack3-sled25.json | 196 + .../new-zones-ledgers/rack3-sled26.json | 178 + .../new-zones-ledgers/rack3-sled27.json | 167 + .../new-zones-ledgers/rack3-sled28.json | 167 + .../new-zones-ledgers/rack3-sled29.json | 184 + .../output/new-zones-ledgers/rack3-sled3.json | 178 + .../new-zones-ledgers/rack3-sled30.json | 167 + .../new-zones-ledgers/rack3-sled31.json | 181 + .../output/new-zones-ledgers/rack3-sled4.json | 167 + .../output/new-zones-ledgers/rack3-sled5.json | 178 + .../output/new-zones-ledgers/rack3-sled6.json | 167 + .../output/new-zones-ledgers/rack3-sled7.json | 167 + .../output/new-zones-ledgers/rack3-sled8.json | 198 + .../output/new-zones-ledgers/rack3-sled9.json | 178 + 104 files changed, 12950 insertions(+), 3013 deletions(-) create mode 100644 schema/all-zones-requests.json rename schema/{rss-service-plan.json => rss-service-plan-v2.json} (80%) create mode 100644 sled-agent/src/bin/services-ledger-check-migrate.rs create mode 100644 sled-agent/src/services_migration.rs create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled10.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled11.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled12.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled14.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled16.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled17.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled21.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled23.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled25.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled8.json create mode 100644 sled-agent/tests/old-service-ledgers/rack2-sled9.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled0.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled1.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled11.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled12.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled13.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled14.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled15.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled16.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled17.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled18.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled19.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled2.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled20.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled21.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled22.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled23.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled24.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled25.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled26.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled27.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled28.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled29.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled3.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled30.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled31.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled4.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled5.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled6.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled7.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled8.json create mode 100644 sled-agent/tests/old-service-ledgers/rack3-sled9.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled10.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled11.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled12.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled14.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled16.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled17.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled21.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled23.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled25.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled8.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack2-sled9.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled0.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled1.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled11.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled12.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled13.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled14.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled15.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled16.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled17.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled18.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled19.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled2.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled20.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled21.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled22.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled23.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled24.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled25.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled26.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled27.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled28.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled29.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled3.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled30.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled31.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled4.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled5.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled6.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled7.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled8.json create mode 100644 sled-agent/tests/output/new-zones-ledgers/rack3-sled9.json diff --git a/Cargo.lock b/Cargo.lock index 358ec1f9b4..b730cbda97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7538,6 +7538,7 @@ dependencies = [ "progenitor", "regress", "reqwest", + "schemars", "serde", "sled-storage", "slog", diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml index e2cc737e70..18ca342a2b 100644 --- a/clients/sled-agent-client/Cargo.toml +++ b/clients/sled-agent-client/Cargo.toml @@ -12,6 +12,7 @@ progenitor.workspace = true ipnetwork.workspace = true regress.workspace = true reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] } +schemars.workspace = true serde.workspace = true slog.workspace = true sled-storage.workspace = true diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 30b554a021..0bbd27cf3e 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -6,11 +6,11 @@ use async_trait::async_trait; use std::convert::TryFrom; -use std::str::FromStr; use uuid::Uuid; progenitor::generate_api!( spec = "../../openapi/sled-agent.json", + derives = [ schemars::JsonSchema ], inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; @@ -529,27 +529,3 @@ impl TestInterfaces for Client { .expect("disk_finish_transition() failed unexpectedly"); } } - -impl From for types::DatasetKind { - fn from(k: sled_storage::dataset::DatasetKind) -> Self { - use sled_storage::dataset::DatasetKind::*; - match k { - CockroachDb => Self::CockroachDb, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} - -impl From for types::DatasetName { - fn from(n: sled_storage::dataset::DatasetName) -> Self { - Self { - pool_name: types::ZpoolName::from_str(&n.pool().to_string()) - .unwrap(), - kind: n.dataset().clone().into(), - } - } -} diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 3e58d1d4d4..db5272cd6e 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -622,6 +622,7 @@ impl From for i64 { Debug, Deserialize, Eq, + Hash, JsonSchema, Ord, PartialEq, diff --git a/common/src/ledger.rs b/common/src/ledger.rs index ae028998e2..c120ab953c 100644 --- a/common/src/ledger.rs +++ b/common/src/ledger.rs @@ -54,6 +54,7 @@ impl From for crate::api::external::Error { /// /// This structure is intended to help with serialization and deserialization /// of configuration information to both M.2s. +#[derive(Debug)] pub struct Ledger { log: Logger, ledger: T, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 22216b9571..6a0d692e99 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -377,14 +377,35 @@ } } }, - "/services": { + "/omicron-zones": { + "get": { + "operationId": "omicron_zones_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronZonesConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, "put": { - "operationId": "services_put", + "operationId": "omicron_zones_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ServiceEnsureBody" + "$ref": "#/components/schemas/OmicronZonesConfig" } } }, @@ -2370,131 +2391,6 @@ "value" ] }, - "DatasetKind": { - "description": "The type of a dataset, and an auxiliary information necessary to successfully launch a zone managing the associated data.", - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "cockroach_db" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "crucible" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "external_dns" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_dns" - ] - } - }, - "required": [ - "type" - ] - } - ] - }, - "DatasetName": { - "type": "object", - "properties": { - "kind": { - "$ref": "#/components/schemas/DatasetKind" - }, - "pool_name": { - "$ref": "#/components/schemas/ZpoolName" - } - }, - "required": [ - "kind", - "pool_name" - ] - }, - "DatasetRequest": { - "description": "Describes a request to provision a specific dataset", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "name": { - "$ref": "#/components/schemas/DatasetName" - }, - "service_address": { - "type": "string" - } - }, - "required": [ - "id", - "name", - "service_address" - ] - }, "Datum": { "description": "A `Datum` is a single sampled data point from a metric.", "oneOf": [ @@ -5027,372 +4923,317 @@ } ] }, - "PortConfigV1": { + "OmicronZoneConfig": { + "description": "Describes one Omicron-managed zone running on a sled", "type": "object", "properties": { - "addresses": { - "description": "This port's addresses.", - "type": "array", - "items": { - "$ref": "#/components/schemas/IpNetwork" - } - }, - "autoneg": { - "description": "Whether or not to set autonegotiation", - "type": "boolean" - }, - "bgp_peers": { - "description": "BGP peers on this port", - "type": "array", - "items": { - "$ref": "#/components/schemas/BgpPeerConfig" - } - }, - "port": { - "description": "Nmae of the port this config applies to.", - "type": "string" - }, - "routes": { - "description": "The set of routes associated with this port.", - "type": "array", - "items": { - "$ref": "#/components/schemas/RouteConfig" - } - }, - "switch": { - "description": "Switch the port belongs to.", - "allOf": [ - { - "$ref": "#/components/schemas/SwitchLocation" - } - ] + "id": { + "type": "string", + "format": "uuid" }, - "uplink_port_fec": { - "description": "Port forward error correction type.", - "allOf": [ - { - "$ref": "#/components/schemas/PortFec" - } - ] + "underlay_address": { + "type": "string", + "format": "ipv6" }, - "uplink_port_speed": { - "description": "Port speed.", - "allOf": [ - { - "$ref": "#/components/schemas/PortSpeed" - } - ] + "zone_type": { + "$ref": "#/components/schemas/OmicronZoneType" } }, "required": [ - "addresses", - "autoneg", - "bgp_peers", - "port", - "routes", - "switch", - "uplink_port_fec", - "uplink_port_speed" - ] - }, - "PortFec": { - "description": "Switchport FEC options", - "type": "string", - "enum": [ - "firecode", - "none", - "rs" + "id", + "underlay_address", + "zone_type" ] }, - "PortSpeed": { - "description": "Switchport Speed options", - "type": "string", - "enum": [ - "speed0_g", - "speed1_g", - "speed10_g", - "speed25_g", - "speed40_g", - "speed50_g", - "speed100_g", - "speed200_g", - "speed400_g" + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "properties": { + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "pool_name" ] }, - "PriorityDimension": { - "description": "A dimension along with bundles can be sorted, to determine priority.", - "oneOf": [ - { - "description": "Sorting by time, with older bundles with lower priority.", - "type": "string", - "enum": [ - "time" - ] - }, - { - "description": "Sorting by the cause for creating the bundle.", - "type": "string", - "enum": [ - "cause" - ] - } - ] - }, - "PriorityOrder": { - "description": "The priority order for bundles during cleanup.\n\nBundles are sorted along the dimensions in [`PriorityDimension`], with each dimension appearing exactly once. During cleanup, lesser-priority bundles are pruned first, to maintain the dataset quota. Note that bundles are sorted by each dimension in the order in which they appear, with each dimension having higher priority than the next.", - "type": "array", - "items": { - "$ref": "#/components/schemas/PriorityDimension" - }, - "minItems": 2, - "maxItems": 2 - }, - "ProducerResultsItem": { + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", "oneOf": [ { "type": "object", "properties": { - "info": { + "address": { + "type": "string" + }, + "dns_servers": { "type": "array", "items": { - "$ref": "#/components/schemas/Sample" + "type": "string", + "format": "ip" } }, - "status": { + "domain": { + "nullable": true, + "type": "string" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/components/schemas/SourceNatConfig" + } + ] + }, + "type": { "type": "string", "enum": [ - "ok" + "boundary_ntp" ] } }, "required": [ - "info", - "status" + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" ] }, { "type": "object", "properties": { - "info": { - "$ref": "#/components/schemas/MetricsError" + "address": { + "type": "string" }, - "status": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { "type": "string", "enum": [ - "err" + "clickhouse" ] } }, "required": [ - "info", - "status" + "address", + "dataset", + "type" ] - } - ] - }, - "QuantizationError": { - "description": "Errors occurring during quantizated bin generation.", - "oneOf": [ + }, { "type": "object", "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, "type": { "type": "string", "enum": [ - "overflow" + "clickhouse_keeper" ] } }, "required": [ + "address", + "dataset", "type" ] }, { "type": "object", "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, "type": { "type": "string", "enum": [ - "precision" + "cockroach_db" ] } }, "required": [ + "address", + "dataset", "type" ] }, { "type": "object", "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, "type": { "type": "string", "enum": [ - "invalid_base" + "crucible" ] } }, "required": [ + "address", + "dataset", "type" ] }, { "type": "object", "properties": { + "address": { + "type": "string" + }, "type": { "type": "string", "enum": [ - "invalid_steps" + "crucible_pantry" ] } }, "required": [ + "address", "type" ] }, { "type": "object", "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, "type": { "type": "string", "enum": [ - "uneven_steps_for_base" + "external_dns" ] } }, "required": [ + "dataset", + "dns_address", + "http_address", + "nic", "type" ] }, { "type": "object", "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "http_address": { + "type": "string" + }, "type": { "type": "string", "enum": [ - "powers_out_of_order" + "internal_dns" ] } }, "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", "type" ] - } - ] - }, - "RackNetworkConfigV1": { - "description": "Initial network configuration", - "type": "object", - "properties": { - "bgp": { - "description": "BGP configurations for connecting the rack to external networks", - "type": "array", - "items": { - "$ref": "#/components/schemas/BgpConfig" - } }, - "infra_ip_first": { - "description": "First ip address to be used for configuring network infrastructure", - "type": "string", - "format": "ipv4" - }, - "infra_ip_last": { - "description": "Last ip address to be used for configuring network infrastructure", - "type": "string", - "format": "ipv4" - }, - "ports": { - "description": "Uplinks for connecting the rack to external networks", - "type": "array", - "items": { - "$ref": "#/components/schemas/PortConfigV1" - } - }, - "rack_subnet": { - "$ref": "#/components/schemas/Ipv6Network" - } - }, - "required": [ - "bgp", - "infra_ip_first", - "infra_ip_last", - "ports", - "rack_subnet" - ] - }, - "RouteConfig": { - "type": "object", - "properties": { - "destination": { - "description": "The destination of the route.", - "allOf": [ - { - "$ref": "#/components/schemas/IpNetwork" - } - ] - }, - "nexthop": { - "description": "The nexthop/gateway address.", - "type": "string", - "format": "ip" - } - }, - "required": [ - "destination", - "nexthop" - ] - }, - "Sample": { - "description": "A concrete type representing a single, timestamped measurement from a timeseries.", - "type": "object", - "properties": { - "measurement": { - "description": "The measured value of the metric at this sample", - "allOf": [ - { - "$ref": "#/components/schemas/Measurement" + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] } + }, + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" ] }, - "metric": { - "$ref": "#/components/schemas/FieldSet" - }, - "target": { - "$ref": "#/components/schemas/FieldSet" - }, - "timeseries_name": { - "description": "The name of the timeseries this sample belongs to", - "type": "string" - } - }, - "required": [ - "measurement", - "metric", - "target", - "timeseries_name" - ] - }, - "SemverVersion": { - "type": "string", - "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" - }, - "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize multiple services.", - "type": "object", - "properties": { - "services": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ServiceZoneRequest" - } - } - }, - "required": [ - "services" - ] - }, - "ServiceType": { - "description": "Describes service-specific parameters.", - "oneOf": [ { "type": "object", "properties": { @@ -5444,331 +5285,396 @@ { "type": "object", "properties": { - "dns_address": { - "description": "The address at which the external DNS server is reachable.", - "type": "string" - }, - "http_address": { - "description": "The address at which the external DNS server API is reachable.", + "address": { "type": "string" }, - "nic": { - "description": "The service vNIC providing external connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/components/schemas/NetworkInterface" - } - ] - }, "type": { "type": "string", "enum": [ - "external_dns" + "oximeter" ] } }, "required": [ - "dns_address", - "http_address", - "nic", + "address", "type" ] + } + ] + }, + "OmicronZonesConfig": { + "description": "Describes the set of Omicron-managed zones running on a sled", + "type": "object", + "properties": { + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] }, - { - "type": "object", - "properties": { - "dns_address": { - "type": "string" - }, - "gz_address": { - "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", - "type": "string", - "format": "ipv6" - }, - "gz_address_index": { - "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", - "type": "integer", - "format": "uint32", - "minimum": 0 - }, - "http_address": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "internal_dns" - ] + "zones": { + "description": "list of running zones", + "type": "array", + "items": { + "$ref": "#/components/schemas/OmicronZoneConfig" + } + } + }, + "required": [ + "generation", + "zones" + ] + }, + "PortConfigV1": { + "type": "object", + "properties": { + "addresses": { + "description": "This port's addresses.", + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNetwork" + } + }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, + "bgp_peers": { + "description": "BGP peers on this port", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpPeerConfig" + } + }, + "port": { + "description": "Nmae of the port this config applies to.", + "type": "string" + }, + "routes": { + "description": "The set of routes associated with this port.", + "type": "array", + "items": { + "$ref": "#/components/schemas/RouteConfig" + } + }, + "switch": { + "description": "Switch the port belongs to.", + "allOf": [ + { + "$ref": "#/components/schemas/SwitchLocation" } - }, - "required": [ - "dns_address", - "gz_address", - "gz_address_index", - "http_address", - "type" ] }, + "uplink_port_fec": { + "description": "Port forward error correction type.", + "allOf": [ + { + "$ref": "#/components/schemas/PortFec" + } + ] + }, + "uplink_port_speed": { + "description": "Port speed.", + "allOf": [ + { + "$ref": "#/components/schemas/PortSpeed" + } + ] + } + }, + "required": [ + "addresses", + "autoneg", + "bgp_peers", + "port", + "routes", + "switch", + "uplink_port_fec", + "uplink_port_speed" + ] + }, + "PortFec": { + "description": "Switchport FEC options", + "type": "string", + "enum": [ + "firecode", + "none", + "rs" + ] + }, + "PortSpeed": { + "description": "Switchport Speed options", + "type": "string", + "enum": [ + "speed0_g", + "speed1_g", + "speed10_g", + "speed25_g", + "speed40_g", + "speed50_g", + "speed100_g", + "speed200_g", + "speed400_g" + ] + }, + "PriorityDimension": { + "description": "A dimension along with bundles can be sorted, to determine priority.", + "oneOf": [ + { + "description": "Sorting by time, with older bundles with lower priority.", + "type": "string", + "enum": [ + "time" + ] + }, + { + "description": "Sorting by the cause for creating the bundle.", + "type": "string", + "enum": [ + "cause" + ] + } + ] + }, + "PriorityOrder": { + "description": "The priority order for bundles during cleanup.\n\nBundles are sorted along the dimensions in [`PriorityDimension`], with each dimension appearing exactly once. During cleanup, lesser-priority bundles are pruned first, to maintain the dataset quota. Note that bundles are sorted by each dimension in the order in which they appear, with each dimension having higher priority than the next.", + "type": "array", + "items": { + "$ref": "#/components/schemas/PriorityDimension" + }, + "minItems": 2, + "maxItems": 2 + }, + "ProducerResultsItem": { + "oneOf": [ { "type": "object", "properties": { - "address": { - "type": "string" + "info": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Sample" + } }, - "type": { + "status": { "type": "string", "enum": [ - "oximeter" + "ok" ] } }, "required": [ - "address", - "type" + "info", + "status" ] }, { "type": "object", "properties": { - "address": { - "type": "string" + "info": { + "$ref": "#/components/schemas/MetricsError" }, - "type": { + "status": { "type": "string", "enum": [ - "crucible_pantry" + "err" ] } }, "required": [ - "address", - "type" + "info", + "status" ] - }, + } + ] + }, + "QuantizationError": { + "description": "Errors occurring during quantizated bin generation.", + "oneOf": [ { "type": "object", "properties": { - "address": { - "type": "string" - }, - "dns_servers": { - "type": "array", - "items": { - "type": "string", - "format": "ip" - } - }, - "domain": { - "nullable": true, - "type": "string" - }, - "nic": { - "description": "The service vNIC providing outbound connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/components/schemas/NetworkInterface" - } - ] - }, - "ntp_servers": { - "type": "array", - "items": { - "type": "string" - } - }, - "snat_cfg": { - "description": "The SNAT configuration for outbound connections.", - "allOf": [ - { - "$ref": "#/components/schemas/SourceNatConfig" - } - ] - }, "type": { "type": "string", "enum": [ - "boundary_ntp" + "overflow" ] } }, "required": [ - "address", - "dns_servers", - "nic", - "ntp_servers", - "snat_cfg", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" - }, - "dns_servers": { - "type": "array", - "items": { - "type": "string", - "format": "ip" - } - }, - "domain": { - "nullable": true, - "type": "string" - }, - "ntp_servers": { - "type": "array", - "items": { - "type": "string" - } - }, "type": { "type": "string", "enum": [ - "internal_ntp" + "precision" ] } }, "required": [ - "address", - "dns_servers", - "ntp_servers", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" - }, "type": { "type": "string", "enum": [ - "clickhouse" + "invalid_base" ] } }, "required": [ - "address", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" - }, "type": { "type": "string", "enum": [ - "clickhouse_keeper" + "invalid_steps" ] } }, "required": [ - "address", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" - }, "type": { "type": "string", "enum": [ - "cockroach_db" + "uneven_steps_for_base" ] } }, "required": [ - "address", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" - }, "type": { "type": "string", "enum": [ - "crucible" + "powers_out_of_order" ] } }, "required": [ - "address", "type" ] } ] }, - "ServiceZoneRequest": { - "description": "Describes a request to create a zone running one or more services.", + "RackNetworkConfigV1": { + "description": "Initial network configuration", "type": "object", "properties": { - "addresses": { + "bgp": { + "description": "BGP configurations for connecting the rack to external networks", "type": "array", "items": { - "type": "string", - "format": "ipv6" + "$ref": "#/components/schemas/BgpConfig" } }, - "dataset": { - "nullable": true, - "default": null, - "allOf": [ - { - "$ref": "#/components/schemas/DatasetRequest" - } - ] + "infra_ip_first": { + "description": "First ip address to be used for configuring network infrastructure", + "type": "string", + "format": "ipv4" }, - "id": { + "infra_ip_last": { + "description": "Last ip address to be used for configuring network infrastructure", "type": "string", - "format": "uuid" + "format": "ipv4" }, - "services": { + "ports": { + "description": "Uplinks for connecting the rack to external networks", "type": "array", "items": { - "$ref": "#/components/schemas/ServiceZoneService" + "$ref": "#/components/schemas/PortConfigV1" } }, - "zone_type": { - "$ref": "#/components/schemas/ZoneType" + "rack_subnet": { + "$ref": "#/components/schemas/Ipv6Network" } }, "required": [ - "addresses", - "id", - "services", - "zone_type" + "bgp", + "infra_ip_first", + "infra_ip_last", + "ports", + "rack_subnet" ] }, - "ServiceZoneService": { - "description": "Used to request that the Sled initialize a single service.", + "RouteConfig": { "type": "object", "properties": { - "details": { - "$ref": "#/components/schemas/ServiceType" + "destination": { + "description": "The destination of the route.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNetwork" + } + ] }, - "id": { + "nexthop": { + "description": "The nexthop/gateway address.", "type": "string", - "format": "uuid" + "format": "ip" } }, "required": [ - "details", - "id" + "destination", + "nexthop" + ] + }, + "Sample": { + "description": "A concrete type representing a single, timestamped measurement from a timeseries.", + "type": "object", + "properties": { + "measurement": { + "description": "The measured value of the metric at this sample", + "allOf": [ + { + "$ref": "#/components/schemas/Measurement" + } + ] + }, + "metric": { + "$ref": "#/components/schemas/FieldSet" + }, + "target": { + "$ref": "#/components/schemas/FieldSet" + }, + "timeseries_name": { + "description": "The name of the timeseries this sample belongs to", + "type": "string" + } + }, + "required": [ + "measurement", + "metric", + "target", + "timeseries_name" ] }, + "SemverVersion": { + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + }, "SetVirtualNetworkInterfaceHost": { "description": "A mapping from a virtual NIC to a physical host", "type": "object", @@ -6434,23 +6340,6 @@ "version" ] }, - "ZoneType": { - "description": "The type of zone which may be requested from Sled Agent", - "type": "string", - "enum": [ - "clickhouse", - "clickhouse_keeper", - "cockroach_db", - "crucible_pantry", - "crucible", - "external_dns", - "internal_dns", - "nexus", - "ntp", - "oximeter", - "switch" - ] - }, "Zpool": { "type": "object", "properties": { diff --git a/schema/all-zone-requests.json b/schema/all-zone-requests.json index 468f00ee0c..4eb56d379d 100644 --- a/schema/all-zone-requests.json +++ b/schema/all-zone-requests.json @@ -1,6 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "AllZoneRequests", + "description": "A wrapper around `ZoneRequest` that allows it to be serialized to a JSON file.", "type": "object", "required": [ "generation", @@ -8,7 +9,12 @@ ], "properties": { "generation": { - "$ref": "#/definitions/Generation" + "description": "ledger generation (not an Omicron-provided generation)", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] }, "requests": { "type": "array", @@ -719,6 +725,7 @@ "minimum": 0.0 }, "ZoneRequest": { + "description": "This struct represents the combo of \"what zone did you ask for\" + \"where did we put it\".", "type": "object", "required": [ "root", @@ -734,7 +741,7 @@ } }, "ZoneType": { - "description": "The type of zone which may be requested from Sled Agent", + "description": "The type of zone that Sled Agent may run", "type": "string", "enum": [ "clickhouse", diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json new file mode 100644 index 0000000000..0e43e9ee21 --- /dev/null +++ b/schema/all-zones-requests.json @@ -0,0 +1,632 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OmicronZonesConfigLocal", + "description": "Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus wants for all of its zones) with the locally-determined configuration for these zones.", + "type": "object", + "required": [ + "ledger_generation", + "omicron_generation", + "zones" + ], + "properties": { + "ledger_generation": { + "description": "ledger-managed generation number\n\nThis generation is managed by the ledger facility itself. It's bumped whenever we write a new ledger. In practice, we don't currently have any reason to bump this _for a given Omicron generation_ so it's somewhat redundant. In principle, if we needed to modify the ledgered configuration due to some event that doesn't change the Omicron config (e.g., if we wanted to move the root filesystem to a different path), we could do that by bumping this generation.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + }, + "omicron_generation": { + "description": "generation of the Omicron-provided part of the configuration\n\nThis generation number is outside of Sled Agent's control. We store exactly what we were given and use this number to decide when to fail requests to establish an outdated configuration.\n\nYou can think of this as a major version number, with `ledger_generation` being a minor version number. See `is_newer_than()`.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + }, + "zones": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronZoneConfigLocal" + } + } + }, + "definitions": { + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/definitions/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/definitions/Ipv6Net" + } + ] + } + ] + }, + "Ipv4Net": { + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and subnet mask", + "examples": [ + "192.168.1.0/24" + ], + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$" + }, + "Ipv6Net": { + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "examples": [ + "fd12:3456::/64" + ], + "type": "string", + "pattern": "^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$" + }, + "MacAddr": { + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "examples": [ + "ff:ff:ff:ff:ff:ff" + ], + "type": "string", + "maxLength": 17, + "minLength": 5, + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$" + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "type": "string", + "maxLength": 63, + "minLength": 1, + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/definitions/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/definitions/MacAddr" + }, + "name": { + "$ref": "#/definitions/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "subnet": { + "$ref": "#/definitions/IpNet" + }, + "vni": { + "$ref": "#/definitions/Vni" + } + } + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + } + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + } + } + ] + }, + "OmicronZoneConfig": { + "description": "Describes one Omicron-managed zone running on a sled", + "type": "object", + "required": [ + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/OmicronZoneType" + } + } + }, + "OmicronZoneConfigLocal": { + "description": "Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus wants for this zone) with any locally-determined configuration (like the path to the root filesystem)", + "type": "object", + "required": [ + "root", + "zone" + ], + "properties": { + "root": { + "type": "string" + }, + "zone": { + "$ref": "#/definitions/OmicronZoneConfig" + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "oneOf": [ + { + "type": "object", + "required": [ + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/definitions/SourceNatConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ], + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "type": "string", + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + } + } + ] + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "required": [ + "first_port", + "ip", + "last_port" + ], + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/schema/rss-service-plan.json b/schema/rss-service-plan-v2.json similarity index 80% rename from schema/rss-service-plan.json rename to schema/rss-service-plan-v2.json index 725caf0900..0bcd27b9cc 100644 --- a/schema/rss-service-plan.json +++ b/schema/rss-service-plan-v2.json @@ -13,136 +13,11 @@ "services": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/SledRequest" + "$ref": "#/definitions/SledConfig" } } }, "definitions": { - "DatasetKind": { - "description": "The type of a dataset, and an auxiliary information necessary to successfully launch a zone managing the associated data.", - "oneOf": [ - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "cockroach_db" - ] - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "crucible" - ] - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse" - ] - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "external_dns" - ] - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_dns" - ] - } - } - } - ] - }, - "DatasetName": { - "type": "object", - "required": [ - "kind", - "pool_name" - ], - "properties": { - "kind": { - "$ref": "#/definitions/DatasetKind" - }, - "pool_name": { - "$ref": "#/definitions/ZpoolName" - } - } - }, - "DatasetRequest": { - "description": "Describes a request to provision a specific dataset", - "type": "object", - "required": [ - "id", - "name", - "service_address" - ], - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "name": { - "$ref": "#/definitions/DatasetName" - }, - "service_address": { - "type": "string" - } - } - }, "DnsConfigParams": { "type": "object", "required": [ @@ -399,53 +274,96 @@ } ] }, - "ServiceType": { - "description": "Describes service-specific parameters.", + "OmicronZoneConfig": { + "description": "Describes one Omicron-managed zone running on a sled", + "type": "object", + "required": [ + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/OmicronZoneType" + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", "oneOf": [ { "type": "object", "required": [ - "external_dns_servers", - "external_ip", - "external_tls", - "internal_address", + "address", + "dns_servers", "nic", + "ntp_servers", + "snat_cfg", "type" ], "properties": { - "external_dns_servers": { - "description": "External DNS servers Nexus can use to resolve external hosts.", + "address": { + "type": "string" + }, + "dns_servers": { "type": "array", "items": { "type": "string", "format": "ip" } }, - "external_ip": { - "description": "The address at which the external nexus server is reachable.", - "type": "string", - "format": "ip" - }, - "external_tls": { - "description": "Whether Nexus's external endpoint should use TLS", - "type": "boolean" - }, - "internal_address": { - "description": "The address at which the internal nexus server is reachable.", - "type": "string" + "domain": { + "type": [ + "string", + "null" + ] }, "nic": { - "description": "The service vNIC providing external connectivity using OPTE.", + "description": "The service vNIC providing outbound connectivity using OPTE.", "allOf": [ { "$ref": "#/definitions/NetworkInterface" } ] }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/definitions/SourceNatConfig" + } + ] + }, "type": { "type": "string", "enum": [ - "nexus" + "boundary_ntp" ] } } @@ -453,32 +371,21 @@ { "type": "object", "required": [ - "dns_address", - "http_address", - "nic", + "address", + "dataset", "type" ], "properties": { - "dns_address": { - "description": "The address at which the external DNS server is reachable.", + "address": { "type": "string" }, - "http_address": { - "description": "The address at which the external DNS server API is reachable.", - "type": "string" - }, - "nic": { - "description": "The service vNIC providing external connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/definitions/NetworkInterface" - } - ] + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" }, "type": { "type": "string", "enum": [ - "external_dns" + "clickhouse" ] } } @@ -486,34 +393,43 @@ { "type": "object", "required": [ - "dns_address", - "gz_address", - "gz_address_index", - "http_address", + "address", + "dataset", "type" ], "properties": { - "dns_address": { + "address": { "type": "string" }, - "gz_address": { - "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", - "type": "string", - "format": "ipv6" - }, - "gz_address_index": { - "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", - "type": "integer", - "format": "uint32", - "minimum": 0.0 + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" }, - "http_address": { + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { "type": "string" }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, "type": { "type": "string", "enum": [ - "internal_dns" + "cockroach_db" ] } } @@ -522,16 +438,20 @@ "type": "object", "required": [ "address", + "dataset", "type" ], "properties": { "address": { "type": "string" }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, "type": { "type": "string", "enum": [ - "oximeter" + "crucible" ] } } @@ -557,56 +477,75 @@ { "type": "object", "required": [ - "address", - "dns_servers", + "dataset", + "dns_address", + "http_address", "nic", - "ntp_servers", - "snat_cfg", "type" ], "properties": { - "address": { - "type": "string" + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" }, - "dns_servers": { - "type": "array", - "items": { - "type": "string", - "format": "ip" - } + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" }, - "domain": { - "type": [ - "string", - "null" - ] + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" }, "nic": { - "description": "The service vNIC providing outbound connectivity using OPTE.", + "description": "The service vNIC providing external connectivity using OPTE.", "allOf": [ { "$ref": "#/definitions/NetworkInterface" } ] }, - "ntp_servers": { - "type": "array", - "items": { - "type": "string" - } - }, - "snat_cfg": { - "description": "The SNAT configuration for outbound connections.", - "allOf": [ - { - "$ref": "#/definitions/SourceNatConfig" - } + "type": { + "type": "string", + "enum": [ + "external_dns" ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" }, "type": { "type": "string", "enum": [ - "boundary_ntp" + "internal_dns" ] } } @@ -653,53 +592,47 @@ { "type": "object", "required": [ - "address", + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", "type" ], "properties": { - "address": { - "type": "string" + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } }, - "type": { + "external_ip": { + "description": "The address at which the external nexus server is reachable.", "type": "string", - "enum": [ - "clickhouse" - ] - } - } - }, - { - "type": "object", - "required": [ - "address", - "type" - ], - "properties": { - "address": { + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", "type": "string" }, - "type": { - "type": "string", - "enum": [ - "clickhouse_keeper" + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } ] - } - } - }, - { - "type": "object", - "required": [ - "address", - "type" - ], - "properties": { - "address": { - "type": "string" }, "type": { "type": "string", "enum": [ - "cockroach_db" + "nexus" ] } } @@ -717,82 +650,24 @@ "type": { "type": "string", "enum": [ - "crucible" + "oximeter" ] } } } ] }, - "ServiceZoneRequest": { - "description": "Describes a request to create a zone running one or more services.", - "type": "object", - "required": [ - "addresses", - "id", - "services", - "zone_type" - ], - "properties": { - "addresses": { - "type": "array", - "items": { - "type": "string", - "format": "ipv6" - } - }, - "dataset": { - "default": null, - "anyOf": [ - { - "$ref": "#/definitions/DatasetRequest" - }, - { - "type": "null" - } - ] - }, - "id": { - "type": "string", - "format": "uuid" - }, - "services": { - "type": "array", - "items": { - "$ref": "#/definitions/ServiceZoneService" - } - }, - "zone_type": { - "$ref": "#/definitions/ZoneType" - } - } - }, - "ServiceZoneService": { - "description": "Used to request that the Sled initialize a single service.", + "SledConfig": { "type": "object", "required": [ - "details", - "id" + "zones" ], "properties": { - "details": { - "$ref": "#/definitions/ServiceType" - }, - "id": { - "type": "string", - "format": "uuid" - } - } - }, - "SledRequest": { - "type": "object", - "properties": { - "service": { - "description": "Services to be instantiated.", - "default": [], + "zones": { + "description": "zones configured for this sled", "type": "array", "items": { - "$ref": "#/definitions/ServiceZoneRequest" + "$ref": "#/definitions/OmicronZoneConfig" } } } @@ -860,23 +735,6 @@ "format": "uint32", "minimum": 0.0 }, - "ZoneType": { - "description": "The type of zone which may be requested from Sled Agent", - "type": "string", - "enum": [ - "clickhouse", - "clickhouse_keeper", - "cockroach_db", - "crucible_pantry", - "crucible", - "external_dns", - "internal_dns", - "nexus", - "ntp", - "oximeter", - "switch" - ] - }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/sled-agent/src/bin/services-ledger-check-migrate.rs b/sled-agent/src/bin/services-ledger-check-migrate.rs new file mode 100644 index 0000000000..456fdc74b7 --- /dev/null +++ b/sled-agent/src/bin/services-ledger-check-migrate.rs @@ -0,0 +1,80 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test-migrates one or more old-format services ledger files to new-format +//! Omicron zones ledgers + +use anyhow::Context; +use camino::Utf8PathBuf; +use clap::Args; +use clap::Parser; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use omicron_sled_agent::services::OmicronZonesConfigLocal; +use omicron_sled_agent::services_migration::AllZoneRequests; + +#[tokio::main] +async fn main() { + if let Err(message) = do_run().await { + fatal(CmdError::Failure(message)); + } +} + +#[derive(Debug, Parser)] +#[clap(about = "Test conversion of old-format services ledgers to new-format \ + zones ledgers")] +enum Converter { + /// checks whether one or more ledger file(s) can be converted successfully + Check(CheckArgs), + + /// for a given ledger file, prints the converted form + Show(ShowArgs), +} + +#[derive(Debug, Args)] +struct CheckArgs { + #[clap(action)] + files: Vec, +} + +#[derive(Debug, Args)] +struct ShowArgs { + #[clap(action)] + file: Utf8PathBuf, +} + +async fn do_run() -> Result<(), anyhow::Error> { + let args = Converter::parse(); + + let (files, do_show) = match args { + Converter::Check(CheckArgs { files }) => (files, false), + Converter::Show(ShowArgs { file }) => (vec![file], true), + }; + + for file_path in &files { + let contents = tokio::fs::read_to_string(file_path) + .await + .with_context(|| format!("read {:?}", &file_path))?; + let parsed: AllZoneRequests = serde_json::from_str(&contents) + .with_context(|| format!("parse {:?}", &file_path))?; + let converted = OmicronZonesConfigLocal::try_from(parsed) + .with_context(|| format!("convert contents of {:?}", &file_path))?; + if do_show { + println!( + "{:#}", + serde_json::to_string_pretty(&converted).with_context( + || format!("print contents of {:?}", &file_path) + )? + ); + } + eprintln!( + "{}: processed okay (zones: {})", + file_path, + converted.zones.len() + ); + } + + eprintln!("all files processed okay (files: {})", files.len()); + Ok(()) +} diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index ab85915dc1..79189e7f49 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -347,6 +347,17 @@ pub(super) mod version { pub(crate) const V1: u32 = 1; } +#[cfg(test)] +pub fn test_config() -> RackInitializeRequest { + let manifest = std::env::var("CARGO_MANIFEST_DIR") + .expect("Cannot access manifest directory"); + let manifest = camino::Utf8PathBuf::from(manifest); + let path = manifest.join("../smf/sled-agent/non-gimlet/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)) +} + #[cfg(test)] mod tests { use std::net::Ipv6Addr; diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index f4948de83b..999e4cc0c8 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -259,7 +259,7 @@ impl Server { // we're responsible for, while continuing to handle hardware // notifications. This cannot fail: we retry indefinitely until // we're done loading services. - sled_agent.cold_boot_load_services().await; + sled_agent.load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { SledAgentState::Bootstrapping( diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2d0e2c4001..9c3a079dac 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -10,7 +10,7 @@ use crate::bootstrap::params::AddSledRequest; use crate::params::{ CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, ServiceEnsureBody, + InstancePutStateResponse, InstanceUnregisterResponse, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, ZoneBundleMetadata, Zpool, }; @@ -51,7 +51,8 @@ pub fn api() -> SledApiDescription { api.register(instance_put_state)?; api.register(instance_register)?; api.register(instance_unregister)?; - api.register(services_put)?; + api.register(omicron_zones_get)?; + api.register(omicron_zones_put)?; api.register(zones_list)?; api.register(zone_bundle_list)?; api.register(zone_bundle_list_all)?; @@ -315,44 +316,28 @@ async fn zones_list( sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) } +#[endpoint { + method = GET, + path = "/omicron-zones", +}] +async fn omicron_zones_get( + rqctx: RequestContext, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await?)) +} + #[endpoint { method = PUT, - path = "/services", + path = "/omicron-zones", }] -async fn services_put( +async fn omicron_zones_put( rqctx: RequestContext, - body: TypedBody, + body: TypedBody, ) -> Result { - let sa = rqctx.context().clone(); + let sa = rqctx.context(); let body_args = body.into_inner(); - - // Spawn a separate task to run `services_ensure`: cancellation of this - // endpoint's future (as might happen if the client abandons the request or - // times out) could result in leaving zones partially configured and the - // in-memory state of the service manager invalid. See: - // oxidecomputer/omicron#3098. - let handler = async move { - match sa.services_ensure(body_args).await { - Ok(()) => Ok(()), - Err(e) => { - // Log the error here to make things clear even if the client - // has already disconnected. - error!(sa.logger(), "failed to initialize services: {e}"); - Err(e) - } - } - }; - match tokio::spawn(handler).await { - Ok(result) => result.map_err(|e| Error::from(e))?, - - Err(e) => { - return Err(HttpError::for_internal_error(format!( - "unexpected failure awaiting \"services_ensure\": {:#}", - e - ))); - } - } - + sa.omicron_zones_ensure(body_args).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 924fd4bd92..d77ec7a3c0 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -32,7 +32,8 @@ pub mod params; mod profile; pub mod rack_setup; pub mod server; -mod services; +pub mod services; +pub mod services_migration; mod sled_agent; mod smf_helper; mod storage_monitor; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index b22bd84975..6be2ceabbd 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,6 +9,8 @@ pub use crate::zone_bundle::ZoneBundleMetadata; pub use illumos_utils::opte::params::DhcpConfig; pub use illumos_utils::opte::params::VpcFirewallRule; pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; +use illumos_utils::zpool::ZpoolName; +use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceProperties, InstanceRuntimeState, SledInstanceState, VmmRuntimeState, @@ -18,13 +20,13 @@ use omicron_common::api::internal::shared::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use sled_hardware::Baseboard; pub use sled_hardware::DendriteAsic; +use sled_storage::dataset::DatasetKind; use sled_storage::dataset::DatasetName; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::str::FromStr; use std::time::Duration; -use thiserror::Error; use uuid::Uuid; /// Used to request a Disk state change @@ -229,252 +231,7 @@ pub struct Zpool { pub disk_type: DiskType, } -/// Describes service-specific parameters. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ServiceType { - Nexus { - /// The address at which the internal nexus server is reachable. - internal_address: SocketAddrV6, - /// The address at which the external nexus server is reachable. - external_ip: IpAddr, - /// The service vNIC providing external connectivity using OPTE. - nic: NetworkInterface, - /// Whether Nexus's external endpoint should use TLS - external_tls: bool, - /// External DNS servers Nexus can use to resolve external hosts. - external_dns_servers: Vec, - }, - ExternalDns { - /// The address at which the external DNS server API is reachable. - http_address: SocketAddrV6, - /// The address at which the external DNS server is reachable. - dns_address: SocketAddr, - /// The service vNIC providing external connectivity using OPTE. - nic: NetworkInterface, - }, - InternalDns { - http_address: SocketAddrV6, - dns_address: SocketAddrV6, - /// The addresses in the global zone which should be created - /// - /// For the DNS service, which exists outside the sleds's typical subnet - adding an - /// address in the GZ is necessary to allow inter-zone traffic routing. - gz_address: Ipv6Addr, - - /// The address is also identified with an auxiliary bit of information - /// to ensure that the created global zone address can have a unique name. - gz_address_index: u32, - }, - Oximeter { - address: SocketAddrV6, - }, - // We should never receive external requests to start wicketd, MGS, sp-sim - // dendrite, tfport, or maghemite: these are all services running in the - // global zone or switch zone that we start autonomously. We tag them with - // `serde(skip)` both to omit them from our OpenAPI definition and to avoid - // needing their contained types to implement `JsonSchema + Deserialize + - // Serialize`. - #[serde(skip)] - ManagementGatewayService, - #[serde(skip)] - Wicketd { - baseboard: Baseboard, - }, - #[serde(skip)] - Dendrite { - asic: DendriteAsic, - }, - #[serde(skip)] - Tfport { - pkt_source: String, - asic: DendriteAsic, - }, - #[serde(skip)] - Uplink, - #[serde(skip)] - MgDdm { - mode: String, - }, - #[serde(skip)] - Mgd, - #[serde(skip)] - SpSim, - CruciblePantry { - address: SocketAddrV6, - }, - BoundaryNtp { - address: SocketAddrV6, - ntp_servers: Vec, - dns_servers: Vec, - domain: Option, - /// The service vNIC providing outbound connectivity using OPTE. - nic: NetworkInterface, - /// The SNAT configuration for outbound connections. - snat_cfg: SourceNatConfig, - }, - InternalNtp { - address: SocketAddrV6, - ntp_servers: Vec, - dns_servers: Vec, - domain: Option, - }, - Clickhouse { - address: SocketAddrV6, - }, - ClickhouseKeeper { - address: SocketAddrV6, - }, - CockroachDb { - address: SocketAddrV6, - }, - Crucible { - address: SocketAddrV6, - }, -} - -impl std::fmt::Display for ServiceType { - fn fmt(&self, f: &mut Formatter<'_>) -> FormatResult { - match self { - ServiceType::Nexus { .. } => write!(f, "nexus"), - ServiceType::ExternalDns { .. } => write!(f, "external_dns"), - ServiceType::InternalDns { .. } => write!(f, "internal_dns"), - ServiceType::Oximeter { .. } => write!(f, "oximeter"), - ServiceType::ManagementGatewayService => write!(f, "mgs"), - ServiceType::Wicketd { .. } => write!(f, "wicketd"), - ServiceType::Dendrite { .. } => write!(f, "dendrite"), - ServiceType::Tfport { .. } => write!(f, "tfport"), - ServiceType::Uplink { .. } => write!(f, "uplink"), - ServiceType::CruciblePantry { .. } => write!(f, "crucible/pantry"), - ServiceType::BoundaryNtp { .. } - | ServiceType::InternalNtp { .. } => write!(f, "ntp"), - ServiceType::MgDdm { .. } => write!(f, "mg-ddm"), - ServiceType::Mgd => write!(f, "mgd"), - ServiceType::SpSim => write!(f, "sp-sim"), - ServiceType::Clickhouse { .. } => write!(f, "clickhouse"), - ServiceType::ClickhouseKeeper { .. } => { - write!(f, "clickhouse_keeper") - } - ServiceType::CockroachDb { .. } => write!(f, "cockroachdb"), - ServiceType::Crucible { .. } => write!(f, "crucible"), - } - } -} - -impl crate::smf_helper::Service for ServiceType { - fn service_name(&self) -> String { - self.to_string() - } - fn smf_name(&self) -> String { - format!("svc:/oxide/{}", self.service_name()) - } - fn should_import(&self) -> bool { - true - } -} - -/// Error returned by attempting to convert an internal service (i.e., a service -/// started autonomously by sled-agent) into a -/// `sled_agent_client::types::ServiceType` to be sent to a remote sled-agent. -#[derive(Debug, Clone, Copy, Error)] -#[error("This service may only be started autonomously by sled-agent")] -pub struct AutonomousServiceOnlyError; - -impl TryFrom for sled_agent_client::types::ServiceType { - type Error = AutonomousServiceOnlyError; - - fn try_from(s: ServiceType) -> Result { - use sled_agent_client::types::ServiceType as AutoSt; - use ServiceType as St; - - match s { - St::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - } => Ok(AutoSt::Nexus { - internal_address: internal_address.to_string(), - external_ip, - nic: nic.into(), - external_tls, - external_dns_servers, - }), - St::ExternalDns { http_address, dns_address, nic } => { - Ok(AutoSt::ExternalDns { - http_address: http_address.to_string(), - dns_address: dns_address.to_string(), - nic: nic.into(), - }) - } - St::InternalDns { - http_address, - dns_address, - gz_address, - gz_address_index, - } => Ok(AutoSt::InternalDns { - http_address: http_address.to_string(), - dns_address: dns_address.to_string(), - gz_address, - gz_address_index, - }), - St::Oximeter { address } => { - Ok(AutoSt::Oximeter { address: address.to_string() }) - } - St::CruciblePantry { address } => { - Ok(AutoSt::CruciblePantry { address: address.to_string() }) - } - St::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - snat_cfg, - } => Ok(AutoSt::BoundaryNtp { - address: address.to_string(), - ntp_servers, - dns_servers, - domain, - nic: nic.into(), - snat_cfg: snat_cfg.into(), - }), - St::InternalNtp { address, ntp_servers, dns_servers, domain } => { - Ok(AutoSt::InternalNtp { - address: address.to_string(), - ntp_servers, - dns_servers, - domain, - }) - } - St::Clickhouse { address } => { - Ok(AutoSt::Clickhouse { address: address.to_string() }) - } - St::ClickhouseKeeper { address } => { - Ok(AutoSt::ClickhouseKeeper { address: address.to_string() }) - } - St::CockroachDb { address } => { - Ok(AutoSt::CockroachDb { address: address.to_string() }) - } - St::Crucible { address } => { - Ok(AutoSt::Crucible { address: address.to_string() }) - } - St::ManagementGatewayService - | St::SpSim - | St::Wicketd { .. } - | St::Dendrite { .. } - | St::Tfport { .. } - | St::Uplink - | St::Mgd - | St::MgDdm { .. } => Err(AutonomousServiceOnlyError), - } - } -} - -/// The type of zone which may be requested from Sled Agent +/// The type of zone that Sled Agent may run #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] @@ -493,24 +250,6 @@ pub enum ZoneType { Switch, } -impl From for sled_agent_client::types::ZoneType { - fn from(zt: ZoneType) -> Self { - match zt { - ZoneType::Clickhouse => Self::Clickhouse, - ZoneType::ClickhouseKeeper => Self::ClickhouseKeeper, - ZoneType::CockroachDb => Self::CockroachDb, - ZoneType::Crucible => Self::Crucible, - ZoneType::CruciblePantry => Self::CruciblePantry, - ZoneType::InternalDns => Self::InternalDns, - ZoneType::ExternalDns => Self::ExternalDns, - ZoneType::Nexus => Self::Nexus, - ZoneType::Ntp => Self::Ntp, - ZoneType::Oximeter => Self::Oximeter, - ZoneType::Switch => Self::Switch, - } - } -} - impl std::fmt::Display for ZoneType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use ZoneType::*; @@ -531,280 +270,516 @@ impl std::fmt::Display for ZoneType { } } -/// Describes a request to provision a specific dataset +/// Generation 1 of `OmicronZonesConfig` is always the set of no zones. +pub const OMICRON_ZONES_CONFIG_INITIAL_GENERATION: u32 = 1; + +/// Describes the set of Omicron-managed zones running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] -pub struct DatasetRequest { - pub id: Uuid, - pub name: DatasetName, - pub service_address: SocketAddrV6, -} - -impl From for sled_agent_client::types::DatasetRequest { - fn from(d: DatasetRequest) -> Self { +pub struct OmicronZonesConfig { + /// generation number of this configuration + /// + /// This generation number is owned by the control plane (i.e., RSS or + /// Nexus, depending on whether RSS-to-Nexus handoff has happened). It + /// should not be bumped within Sled Agent. + /// + /// Sled Agent rejects attempts to set the configuration to a generation + /// older than the one it's currently running. + pub generation: Generation, + + /// list of running zones + pub zones: Vec, +} + +impl From for sled_agent_client::types::OmicronZonesConfig { + fn from(local: OmicronZonesConfig) -> Self { Self { - id: d.id, - name: d.name.into(), - service_address: d.service_address.to_string(), + generation: local.generation.into(), + zones: local.zones.into_iter().map(|s| s.into()).collect(), } } } -/// Describes a request to create a zone running one or more services. +/// Describes one Omicron-managed zone running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] -pub struct ServiceZoneRequest { - // The UUID of the zone to be initialized. - // TODO: Should this be removed? If we have UUIDs on the services, what's - // the point of this? +pub struct OmicronZoneConfig { pub id: Uuid, - // The type of the zone to be created. - pub zone_type: ZoneType, - // The addresses on which the service should listen for requests. - pub addresses: Vec, - // Datasets which should be managed by this service. - #[serde(default)] - pub dataset: Option, - // Services that should be run in the zone - pub services: Vec, -} - -impl ServiceZoneRequest { - // The full name of the zone, if it was to be created as a zone. - pub fn zone_name(&self) -> String { - illumos_utils::running_zone::InstalledZone::get_zone_name( - &self.zone_type.to_string(), - self.zone_name_unique_identifier(), - ) - } + pub underlay_address: Ipv6Addr, + pub zone_type: OmicronZoneType, +} - // The name of a unique identifier for the zone, if one is necessary. - pub fn zone_name_unique_identifier(&self) -> Option { - match &self.zone_type { - // The switch zone is necessarily a singleton. - ZoneType::Switch => None, - // All other zones should be identified by their zone UUID. - ZoneType::Clickhouse - | ZoneType::ClickhouseKeeper - | ZoneType::CockroachDb - | ZoneType::Crucible - | ZoneType::ExternalDns - | ZoneType::InternalDns - | ZoneType::Nexus - | ZoneType::CruciblePantry - | ZoneType::Ntp - | ZoneType::Oximeter => Some(self.id), +impl From for sled_agent_client::types::OmicronZoneConfig { + fn from(local: OmicronZoneConfig) -> Self { + Self { + id: local.id, + underlay_address: local.underlay_address, + zone_type: local.zone_type.into(), } } } -impl TryFrom - for sled_agent_client::types::ServiceZoneRequest -{ - type Error = AutonomousServiceOnlyError; +impl OmicronZoneConfig { + /// If this kind of zone has an associated dataset, returns the dataset's + /// name. Othrwise, returns `None`. + pub fn dataset_name(&self) -> Option { + self.zone_type.dataset_name() + } - fn try_from(s: ServiceZoneRequest) -> Result { - let mut services = Vec::with_capacity(s.services.len()); - for service in s.services { - services.push(service.try_into()?); - } + /// If this kind of zone has an associated dataset, return the dataset's + /// name and the associated "service address". Otherwise, returns `None`. + pub fn dataset_name_and_address( + &self, + ) -> Option<(DatasetName, SocketAddrV6)> { + self.zone_type.dataset_name_and_address() + } - Ok(Self { - id: s.id, - zone_type: s.zone_type.into(), - addresses: s.addresses, - dataset: s.dataset.map(|d| d.into()), - services, - }) + /// Returns the name that is (or will be) used for the illumos zone + /// associated with this zone + pub fn zone_name(&self) -> String { + illumos_utils::running_zone::InstalledZone::get_zone_name( + &self.zone_type.zone_type_str(), + Some(self.id), + ) } -} -impl ServiceZoneRequest { - pub fn into_nexus_service_req( + /// Returns the structure that describes this zone to Nexus during rack + /// initialization + pub fn to_nexus_service_req( &self, sled_id: Uuid, - ) -> Result< - Vec, - AutonomousServiceOnlyError, - > { + ) -> nexus_client::types::ServicePutRequest { use nexus_client::types as NexusTypes; - let mut services = vec![]; - for svc in &self.services { - let service_id = svc.id; - let zone_id = Some(self.id); - match &svc.details { - ServiceType::Nexus { - external_ip, - internal_address, - nic, - .. - } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: internal_address.to_string(), - kind: NexusTypes::ServiceKind::Nexus { - external_address: *external_ip, - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - }, - }, - }); - } - ServiceType::ExternalDns { http_address, dns_address, nic } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: http_address.to_string(), - kind: NexusTypes::ServiceKind::ExternalDns { - external_address: dns_address.ip(), - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - }, - }, - }); - } - ServiceType::InternalDns { http_address, .. } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: http_address.to_string(), - kind: NexusTypes::ServiceKind::InternalDns, - }); + let service_id = self.id; + let zone_id = Some(self.id); + match &self.zone_type { + OmicronZoneType::Nexus { + external_ip, + internal_address, + nic, + .. + } => NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: internal_address.to_string(), + kind: NexusTypes::ServiceKind::Nexus { + external_address: *external_ip, + nic: NexusTypes::ServiceNic { + id: nic.id, + name: nic.name.clone(), + ip: nic.ip, + mac: nic.mac, + }, + }, + }, + OmicronZoneType::ExternalDns { + http_address, + dns_address, + nic, + .. + } => NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: http_address.to_string(), + kind: NexusTypes::ServiceKind::ExternalDns { + external_address: dns_address.ip(), + nic: NexusTypes::ServiceNic { + id: nic.id, + name: nic.name.clone(), + ip: nic.ip, + mac: nic.mac, + }, + }, + }, + OmicronZoneType::InternalDns { http_address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: http_address.to_string(), + kind: NexusTypes::ServiceKind::InternalDns, } - ServiceType::Oximeter { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Oximeter, - }); + } + OmicronZoneType::Oximeter { address } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::Oximeter, } - ServiceType::CruciblePantry { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::CruciblePantry, - }); + } + OmicronZoneType::CruciblePantry { address } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::CruciblePantry, } - ServiceType::BoundaryNtp { address, snat_cfg, nic, .. } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::BoundaryNtp { - snat: snat_cfg.into(), - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - }, + } + OmicronZoneType::BoundaryNtp { address, snat_cfg, nic, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::BoundaryNtp { + snat: snat_cfg.into(), + nic: NexusTypes::ServiceNic { + id: nic.id, + name: nic.name.clone(), + ip: nic.ip, + mac: nic.mac, }, - }); + }, } - ServiceType::InternalNtp { address, .. } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::InternalNtp, - }); - } - ServiceType::Clickhouse { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Clickhouse, - }); + } + OmicronZoneType::InternalNtp { address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::InternalNtp, } - ServiceType::ClickhouseKeeper { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::ClickhouseKeeper, - }); + } + OmicronZoneType::Clickhouse { address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::Clickhouse, } - ServiceType::Crucible { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Crucible, - }); + } + OmicronZoneType::ClickhouseKeeper { address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::ClickhouseKeeper, } - ServiceType::CockroachDb { address } => { - services.push(NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Cockroach, - }); + } + OmicronZoneType::Crucible { address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::Crucible, } - ServiceType::ManagementGatewayService - | ServiceType::SpSim - | ServiceType::Wicketd { .. } - | ServiceType::Dendrite { .. } - | ServiceType::MgDdm { .. } - | ServiceType::Mgd - | ServiceType::Tfport { .. } - | ServiceType::Uplink => { - return Err(AutonomousServiceOnlyError); + } + OmicronZoneType::CockroachDb { address, .. } => { + NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: address.to_string(), + kind: NexusTypes::ServiceKind::Cockroach, } } } + } +} - Ok(services) +/// Describes a persistent ZFS dataset associated with an Omicron zone +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct OmicronZoneDataset { + pub pool_name: ZpoolName, +} + +impl From for sled_agent_client::types::OmicronZoneDataset { + fn from(local: OmicronZoneDataset) -> Self { + Self { + pool_name: sled_agent_client::types::ZpoolName::from_str( + &local.pool_name.to_string(), + ) + .unwrap(), + } } } -/// Used to request that the Sled initialize a single service. +/// Describes what kind of zone this is (i.e., what component is running in it) +/// as well as any type-specific configuration #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] -pub struct ServiceZoneService { - pub id: Uuid, - pub details: ServiceType, +#[serde(tag = "type", rename_all = "snake_case")] +pub enum OmicronZoneType { + BoundaryNtp { + address: SocketAddrV6, + ntp_servers: Vec, + dns_servers: Vec, + domain: Option, + /// The service vNIC providing outbound connectivity using OPTE. + nic: NetworkInterface, + /// The SNAT configuration for outbound connections. + snat_cfg: SourceNatConfig, + }, + + Clickhouse { + address: SocketAddrV6, + dataset: OmicronZoneDataset, + }, + + ClickhouseKeeper { + address: SocketAddrV6, + dataset: OmicronZoneDataset, + }, + CockroachDb { + address: SocketAddrV6, + dataset: OmicronZoneDataset, + }, + + Crucible { + address: SocketAddrV6, + dataset: OmicronZoneDataset, + }, + CruciblePantry { + address: SocketAddrV6, + }, + ExternalDns { + dataset: OmicronZoneDataset, + /// The address at which the external DNS server API is reachable. + http_address: SocketAddrV6, + /// The address at which the external DNS server is reachable. + dns_address: SocketAddr, + /// The service vNIC providing external connectivity using OPTE. + nic: NetworkInterface, + }, + InternalDns { + dataset: OmicronZoneDataset, + http_address: SocketAddrV6, + dns_address: SocketAddrV6, + /// The addresses in the global zone which should be created + /// + /// For the DNS service, which exists outside the sleds's typical subnet + /// - adding an address in the GZ is necessary to allow inter-zone + /// traffic routing. + gz_address: Ipv6Addr, + + /// The address is also identified with an auxiliary bit of information + /// to ensure that the created global zone address can have a unique + /// name. + gz_address_index: u32, + }, + InternalNtp { + address: SocketAddrV6, + ntp_servers: Vec, + dns_servers: Vec, + domain: Option, + }, + Nexus { + /// The address at which the internal nexus server is reachable. + internal_address: SocketAddrV6, + /// The address at which the external nexus server is reachable. + external_ip: IpAddr, + /// The service vNIC providing external connectivity using OPTE. + nic: NetworkInterface, + /// Whether Nexus's external endpoint should use TLS + external_tls: bool, + /// External DNS servers Nexus can use to resolve external hosts. + external_dns_servers: Vec, + }, + Oximeter { + address: SocketAddrV6, + }, } -impl TryFrom - for sled_agent_client::types::ServiceZoneService -{ - type Error = AutonomousServiceOnlyError; +impl OmicronZoneType { + /// Returns a canonical string identifying the type of zone this is + /// + /// This is used to construct zone names, SMF service names, etc. + pub fn zone_type_str(&self) -> String { + match self { + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } => ZoneType::Ntp, + + OmicronZoneType::Clickhouse { .. } => ZoneType::Clickhouse, + OmicronZoneType::ClickhouseKeeper { .. } => { + ZoneType::ClickhouseKeeper + } + OmicronZoneType::CockroachDb { .. } => ZoneType::CockroachDb, + OmicronZoneType::Crucible { .. } => ZoneType::Crucible, + OmicronZoneType::CruciblePantry { .. } => ZoneType::CruciblePantry, + OmicronZoneType::ExternalDns { .. } => ZoneType::ExternalDns, + OmicronZoneType::InternalDns { .. } => ZoneType::InternalDns, + OmicronZoneType::Nexus { .. } => ZoneType::Nexus, + OmicronZoneType::Oximeter { .. } => ZoneType::Oximeter, + } + .to_string() + } + + /// If this kind of zone has an associated dataset, returns the dataset's + /// name. Othrwise, returns `None`. + pub fn dataset_name(&self) -> Option { + self.dataset_name_and_address().map(|d| d.0) + } + + /// If this kind of zone has an associated dataset, return the dataset's + /// name and the associated "service address". Otherwise, returns `None`. + pub fn dataset_name_and_address( + &self, + ) -> Option<(DatasetName, SocketAddrV6)> { + let (dataset, dataset_kind, address) = match self { + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::Nexus { .. } + | OmicronZoneType::Oximeter { .. } + | OmicronZoneType::CruciblePantry { .. } => None, + OmicronZoneType::Clickhouse { dataset, address, .. } => { + Some((dataset, DatasetKind::Clickhouse, address)) + } + OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { + Some((dataset, DatasetKind::ClickhouseKeeper, address)) + } + OmicronZoneType::CockroachDb { dataset, address, .. } => { + Some((dataset, DatasetKind::CockroachDb, address)) + } + OmicronZoneType::Crucible { dataset, address, .. } => { + Some((dataset, DatasetKind::Crucible, address)) + } + OmicronZoneType::ExternalDns { dataset, http_address, .. } => { + Some((dataset, DatasetKind::ExternalDns, http_address)) + } + OmicronZoneType::InternalDns { dataset, http_address, .. } => { + Some((dataset, DatasetKind::InternalDns, http_address)) + } + }?; - fn try_from(s: ServiceZoneService) -> Result { - let details = s.details.try_into()?; - Ok(Self { id: s.id, details }) + Some(( + DatasetName::new(dataset.pool_name.clone(), dataset_kind), + *address, + )) } } -/// Used to request that the Sled initialize multiple services. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct ServiceEnsureBody { - pub services: Vec, +impl crate::smf_helper::Service for OmicronZoneType { + fn service_name(&self) -> String { + // For historical reasons, crucible-pantry is the only zone type whose + // SMF service does not match the canonical name that we use for the + // zone. + match self { + OmicronZoneType::CruciblePantry { .. } => { + "crucible/pantry".to_owned() + } + _ => self.zone_type_str(), + } + } + fn smf_name(&self) -> String { + format!("svc:/oxide/{}", self.service_name()) + } + fn should_import(&self) -> bool { + true + } +} + +impl From for sled_agent_client::types::OmicronZoneType { + fn from(local: OmicronZoneType) -> Self { + use sled_agent_client::types::OmicronZoneType as Other; + match local { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => Other::BoundaryNtp { + address: address.to_string(), + dns_servers, + domain, + ntp_servers, + snat_cfg: snat_cfg.into(), + nic: nic.into(), + }, + OmicronZoneType::Clickhouse { address, dataset } => { + Other::Clickhouse { + address: address.to_string(), + dataset: dataset.into(), + } + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + Other::ClickhouseKeeper { + address: address.to_string(), + dataset: dataset.into(), + } + } + OmicronZoneType::CockroachDb { address, dataset } => { + Other::CockroachDb { + address: address.to_string(), + dataset: dataset.into(), + } + } + OmicronZoneType::Crucible { address, dataset } => Other::Crucible { + address: address.to_string(), + dataset: dataset.into(), + }, + OmicronZoneType::CruciblePantry { address } => { + Other::CruciblePantry { address: address.to_string() } + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => Other::ExternalDns { + dataset: dataset.into(), + http_address: http_address.to_string(), + dns_address: dns_address.to_string(), + nic: nic.into(), + }, + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => Other::InternalDns { + dataset: dataset.into(), + http_address: http_address.to_string(), + dns_address: dns_address.to_string(), + gz_address, + gz_address_index, + }, + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => Other::InternalNtp { + address: address.to_string(), + ntp_servers, + dns_servers, + domain, + }, + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => Other::Nexus { + external_dns_servers, + external_ip, + external_tls, + internal_address: internal_address.to_string(), + nic: nic.into(), + }, + OmicronZoneType::Oximeter { address } => { + Other::Oximeter { address: address.to_string() } + } + } + } } #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 980f5b6ebd..441c7fd842 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -5,10 +5,7 @@ //! Plan generation for "where should services be initialized". use crate::bootstrap::params::StartSledAgentRequest; -use crate::params::{ - DatasetRequest, ServiceType, ServiceZoneRequest, ServiceZoneService, - ZoneType, -}; +use crate::params::{OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType}; use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; @@ -97,20 +94,20 @@ pub enum PlanError { #[error("Ran out of sleds / U2 storage pools")] NotEnoughSleds, + + #[error("Found only v1 service plan")] + FoundV1, } -#[derive( - Clone, Debug, Default, Deserialize, Serialize, PartialEq, JsonSchema, -)] -pub struct SledRequest { - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, +#[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] +pub struct SledConfig { + /// zones configured for this sled + pub zones: Vec, } #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct Plan { - pub services: HashMap, + pub services: HashMap, pub dns_config: DnsConfigParams, } @@ -120,7 +117,8 @@ impl Ledgerable for Plan { } fn generation_bump(&mut self) {} } -const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan.json"; +const RSS_SERVICE_PLAN_V1_FILENAME: &str = "rss-service-plan.json"; +const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v2.json"; impl Plan { pub async fn load( @@ -142,11 +140,60 @@ impl Plan { if let Some(ledger) = ledger { info!(log, "RSS plan already created, loading from file"); Ok(Some(ledger.data().clone())) + } else if Self::has_v1(storage_manager).await.map_err(|err| { + PlanError::Io { + message: String::from("looking for v1 RSS plan"), + err, + } + })? { + // If we found no current-version service plan, but we _do_ find + // a v1 plan present, bail out. We do not expect to ever see this + // in practice because that would indicate that: + // + // - We ran RSS previously on this same system using an older + // version of the software that generates v1 service plans and it + // got far enough through RSS to have written the v1 service plan. + // - That means it must have finished initializing all sled agents, + // including itself, causing it to record a + // `StartSledAgentRequest`s in its ledger -- while still running + // the older RSS. + // - But we're currently running software that knows about v2 + // service plans. Thus, this process started some time after that + // ledger was written. + // - But the bootstrap agent refuses to execute RSS if it has a + // local `StartSledAgentRequest` ledgered. So we shouldn't get + // here if all of the above happened. + // + // This sounds like a complicated set of assumptions. If we got + // this wrong, we'll fail spuriously here and we'll have to figure + // out what happened. But the alternative is doing extra work to + // support a condition that we do not believe can ever happen in any + // system. + Err(PlanError::FoundV1) } else { Ok(None) } } + async fn has_v1( + storage_manager: &StorageHandle, + ) -> Result { + let paths = storage_manager + .get_latest_resources() + .await + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(RSS_SERVICE_PLAN_V1_FILENAME)); + + for p in paths { + if p.try_exists()? { + return Ok(true); + } + } + + Ok(false) + } + async fn is_sled_scrimlet( log: &Logger, address: SocketAddrV6, @@ -235,41 +282,13 @@ impl Plan { Ok(u2_zpools) } - pub async fn create( - log: &Logger, + pub fn create_transient( config: &Config, - storage_manager: &StorageHandle, - sleds: &HashMap, + mut sled_info: Vec, ) -> Result { let mut dns_builder = internal_dns::DnsConfigBuilder::new(); let mut svc_port_builder = ServicePortBuilder::new(config); - // Load the information we need about each Sled to be able to allocate - // components on it. - let mut sled_info = { - let result: Result, PlanError> = - futures::future::try_join_all(sleds.values().map( - |sled_request| async { - let subnet = sled_request.body.subnet; - let sled_address = get_sled_address(subnet); - let u2_zpools = - Self::get_u2_zpools_from_sled(log, sled_address) - .await?; - let is_scrimlet = - Self::is_sled_scrimlet(log, sled_address).await?; - Ok(SledInfo::new( - sled_request.body.id, - subnet, - sled_address, - u2_zpools, - is_scrimlet, - )) - }, - )) - .await; - result? - }; - // Scrimlets get DNS records for running Dendrite. let scrimlets: Vec<_> = sled_info.iter().filter(|s| s.is_scrimlet).collect(); @@ -348,24 +367,18 @@ impl Plan { let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::InternalDns)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::InternalDns, - addresses: vec![ip], - dataset: Some(DatasetRequest { - id, - name: dataset_name, - service_address: http_address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::InternalDns { - http_address, - dns_address, - gz_address: dns_subnet.gz_address().ip(), - gz_address_index: i.try_into().expect("Giant indices?"), + underlay_address: ip, + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), }, - }], + http_address, + dns_address, + gz_address: dns_subnet.gz_address().ip(), + gz_address_index: i.try_into().expect("Giant indices?"), + }, }); } @@ -386,19 +399,15 @@ impl Plan { .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::CockroachDb)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::CockroachDb, - addresses: vec![ip], - dataset: Some(DatasetRequest { - id, - name: dataset_name, - service_address: address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::CockroachDb { address }, - }], + underlay_address: ip, + zone_type: OmicronZoneType::CockroachDb { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + address, + }, }); } @@ -433,23 +442,17 @@ impl Plan { let dataset_kind = DatasetKind::ExternalDns; let dataset_name = sled.alloc_from_u2_zpool(dataset_kind)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::ExternalDns, - addresses: vec![*http_address.ip()], - dataset: Some(DatasetRequest { - id, - name: dataset_name, - service_address: http_address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::ExternalDns { - http_address, - dns_address, - nic, + underlay_address: *http_address.ip(), + zone_type: OmicronZoneType::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), }, - }], + http_address, + dns_address, + nic, + }, }); } @@ -471,33 +474,28 @@ impl Plan { ) .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::Nexus, - addresses: vec![address], - dataset: None, - services: vec![ServiceZoneService { - id, - details: ServiceType::Nexus { - internal_address: SocketAddrV6::new( - address, - omicron_common::address::NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_ip, - nic, - // Tell Nexus to use TLS if and only if the caller - // provided TLS certificates. This effectively - // determines the status of TLS for the lifetime of - // the rack. In production-like deployments, we'd - // always expect TLS to be enabled. It's only in - // development that it might not be. - external_tls: !config.external_certificates.is_empty(), - external_dns_servers: config.dns_servers.clone(), - }, - }], - }) + underlay_address: address, + zone_type: OmicronZoneType::Nexus { + internal_address: SocketAddrV6::new( + address, + omicron_common::address::NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_ip, + nic, + // Tell Nexus to use TLS if and only if the caller + // provided TLS certificates. This effectively + // determines the status of TLS for the lifetime of + // the rack. In production-like deployments, we'd + // always expect TLS to be enabled. It's only in + // development that it might not be. + external_tls: !config.external_certificates.is_empty(), + external_dns_servers: config.dns_servers.clone(), + }, + }); } // Provision Oximeter zones, continuing to stripe across sleds. @@ -518,22 +516,17 @@ impl Plan { omicron_common::address::OXIMETER_PORT, ) .unwrap(); - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::Oximeter, - addresses: vec![address], - dataset: None, - services: vec![ServiceZoneService { - id, - details: ServiceType::Oximeter { - address: SocketAddrV6::new( - address, - omicron_common::address::OXIMETER_PORT, - 0, - 0, - ), - }, - }], + underlay_address: address, + zone_type: OmicronZoneType::Oximeter { + address: SocketAddrV6::new( + address, + omicron_common::address::OXIMETER_PORT, + 0, + 0, + ), + }, }) } @@ -555,19 +548,15 @@ impl Plan { .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::Clickhouse)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::Clickhouse, - addresses: vec![ip], - dataset: Some(DatasetRequest { - id, - name: dataset_name, - service_address: address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::Clickhouse { address }, - }], + underlay_address: ip, + zone_type: OmicronZoneType::Clickhouse { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, }); } @@ -595,19 +584,15 @@ impl Plan { .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::ClickhouseKeeper)?; - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::ClickhouseKeeper, - addresses: vec![ip], - dataset: Some(DatasetRequest { - id, - name: dataset_name, - service_address: address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::ClickhouseKeeper { address }, - }], + underlay_address: ip, + zone_type: OmicronZoneType::ClickhouseKeeper { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, }); } @@ -626,18 +611,13 @@ impl Plan { dns_builder .service_backend_zone(ServiceName::CruciblePantry, &zone, port) .unwrap(); - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::CruciblePantry, - addresses: vec![address], - dataset: None, - services: vec![ServiceZoneService { - id, - details: ServiceType::CruciblePantry { - address: SocketAddrV6::new(address, port, 0, 0), - }, - }], - }) + underlay_address: address, + zone_type: OmicronZoneType::CruciblePantry { + address: SocketAddrV6::new(address, port, 0, 0), + }, + }); } // Provision a Crucible zone on every zpool on every Sled. @@ -657,22 +637,13 @@ impl Plan { ) .unwrap(); - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::Crucible, - addresses: vec![ip], - dataset: Some(DatasetRequest { - id, - name: DatasetName::new( - pool.clone(), - DatasetKind::Crucible, - ), - service_address: address, - }), - services: vec![ServiceZoneService { - id, - details: ServiceType::Crucible { address }, - }], + underlay_address: ip, + zone_type: OmicronZoneType::Crucible { + address, + dataset: OmicronZoneDataset { pool_name: pool.clone() }, + }, }); } } @@ -685,47 +656,40 @@ impl Plan { let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); let zone = dns_builder.host_zone(id, address).unwrap(); + let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); - let (services, svcname) = if idx < BOUNDARY_NTP_COUNT { + let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { boundary_ntp_servers.push(format!("{}.host.{}", id, DNS_ZONE)); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( - vec![ServiceZoneService { - id, - details: ServiceType::BoundaryNtp { - address: SocketAddrV6::new(address, NTP_PORT, 0, 0), - ntp_servers: config.ntp_servers.clone(), - dns_servers: config.dns_servers.clone(), - domain: None, - nic, - snat_cfg, - }, - }], + OmicronZoneType::BoundaryNtp { + address: ntp_address, + ntp_servers: config.ntp_servers.clone(), + dns_servers: config.dns_servers.clone(), + domain: None, + nic, + snat_cfg, + }, ServiceName::BoundaryNtp, ) } else { ( - vec![ServiceZoneService { - id, - details: ServiceType::InternalNtp { - address: SocketAddrV6::new(address, NTP_PORT, 0, 0), - ntp_servers: boundary_ntp_servers.clone(), - dns_servers: rack_dns_servers.clone(), - domain: None, - }, - }], + OmicronZoneType::InternalNtp { + address: ntp_address, + ntp_servers: boundary_ntp_servers.clone(), + dns_servers: rack_dns_servers.clone(), + domain: None, + }, ServiceName::InternalNtp, ) }; dns_builder.service_backend_zone(svcname, &zone, NTP_PORT).unwrap(); - sled.request.services.push(ServiceZoneRequest { + sled.request.zones.push(OmicronZoneConfig { id, - zone_type: ZoneType::Ntp, - addresses: vec![address], - dataset: None, - services, + underlay_address: address, + zone_type, }); } @@ -735,7 +699,42 @@ impl Plan { .collect(); let dns_config = dns_builder.build(); - let plan = Self { services, dns_config }; + Ok(Self { services, dns_config }) + } + + pub async fn create( + log: &Logger, + config: &Config, + storage_manager: &StorageHandle, + sleds: &HashMap, + ) -> Result { + // Load the information we need about each Sled to be able to allocate + // components on it. + let sled_info = { + let result: Result, PlanError> = + futures::future::try_join_all(sleds.values().map( + |sled_request| async { + let subnet = sled_request.body.subnet; + let sled_address = get_sled_address(subnet); + let u2_zpools = + Self::get_u2_zpools_from_sled(log, sled_address) + .await?; + let is_scrimlet = + Self::is_sled_scrimlet(log, sled_address).await?; + Ok(SledInfo::new( + sled_request.body.id, + subnet, + sled_address, + u2_zpools, + is_scrimlet, + )) + }, + )) + .await; + result? + }; + + let plan = Self::create_transient(config, sled_info)?; // Once we've constructed a plan, write it down to durable storage. let paths: Vec = storage_manager @@ -773,13 +772,13 @@ impl AddressBumpAllocator { } /// Wraps up the information used to allocate components to a Sled -struct SledInfo { +pub struct SledInfo { /// unique id for the sled agent - sled_id: Uuid, + pub sled_id: Uuid, /// the sled's unique IPv6 subnet subnet: Ipv6Subnet, /// the address of the Sled Agent on the sled's subnet - sled_address: SocketAddrV6, + pub sled_address: SocketAddrV6, /// the list of zpools on the Sled u2_zpools: Vec, /// spreads components across a Sled's zpools @@ -789,12 +788,12 @@ struct SledInfo { is_scrimlet: bool, /// allocator for addresses in this Sled's subnet addr_alloc: AddressBumpAllocator, - /// under-construction list of services being deployed to a Sled - request: SledRequest, + /// under-construction list of Omicron zones being deployed to a Sled + request: SledConfig, } impl SledInfo { - fn new( + pub fn new( sled_id: Uuid, subnet: Ipv6Subnet, sled_address: SocketAddrV6, @@ -1209,10 +1208,10 @@ mod tests { } #[test] - fn test_rss_service_plan_schema() { + fn test_rss_service_plan_v2_schema() { let schema = schemars::schema_for!(Plan); expectorate::assert_contents( - "../schema/rss-service-plan.json", + "../schema/rss-service-plan-v2.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0b1eadf464..8038658fb1 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -11,15 +11,25 @@ //! - DNS records for those services //! - Handoff to Nexus, for control of Control Plane management //! -//! # Phases and Configuration Files +//! # Phases, state files, and restart behavior //! -//! Rack setup occurs in distinct phases which are denoted by the prescence of -//! configuration files. +//! Rack setup occurs in distinct phases that are denoted by the presence of +//! state files that get generated as RSS executes: //! //! - /pool/int/UUID/config/rss-sled-plan.json (Sled Plan) -//! - /pool/int/UUID/config/rss-service-plan.json (Service Plan) +//! - /pool/int/UUID/config/rss-service-plan-v2.json (Service Plan) //! - /pool/int/UUID/config/rss-plan-completed.marker (Plan Execution Complete) //! +//! These phases are described below. As each phase completes, a corresponding +//! state file is written. This mechanism is designed so that if RSS restarts +//! (e.g., after a crash) then it will resume execution using the same plans. +//! +//! The service plan file has "-v2" in the filename because its structure +//! changed in omicron#4466. It is possible that on startup, RSS finds an +//! older-form service plan. In that case, it fails altogether. We do not +//! expect this condition to happen in practice. See the implementation for +//! details. +//! //! ## Sled Plan //! //! RSS should start as a service executing on a Sidecar-attached Gimlet @@ -65,8 +75,8 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::{d2n_params, ConvertInto}; use crate::params::{ - AutonomousServiceOnlyError, ServiceType, ServiceZoneRequest, - ServiceZoneService, TimeSync, ZoneType, + OmicronZoneType, OmicronZonesConfig, TimeSync, + OMICRON_ZONES_CONFIG_INITIAL_GENERATION, }; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, @@ -83,6 +93,7 @@ use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; use omicron_common::address::get_sled_address; +use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::ExternalPortDiscovery; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, @@ -257,45 +268,75 @@ impl ServiceInner { ServiceInner { log } } - async fn initialize_services_on_sled( + /// Requests that the specified sled configure zones as described by + /// `zones_config` + /// + /// This function succeeds even if the sled fails to apply the configuration + /// if the reason is that the sled is already running a newer configuration. + /// This might sound oddly specific but it's what our sole caller wants. + /// In particular, the caller is going to call this function a few times + /// with successive generation numbers. If we crash and go through the + /// process again, we might run into this case, and it's simplest to just + /// ignore it and proceed. + async fn initialize_zones_on_sled( &self, sled_address: SocketAddrV6, - services: &Vec, + zones_config: &OmicronZonesConfig, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; + let log = self.log.new(o!("sled_address" => sled_address.to_string())); let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address.to_string())), + log.clone(), ); - let services = services - .iter() - .map(|s| s.clone().try_into()) - .collect::, AutonomousServiceOnlyError>>() - .map_err(|err| { - SetupServiceError::SledInitialization(err.to_string()) - })?; - - info!(self.log, "sending service requests..."); let services_put = || async { - info!(self.log, "initializing sled services: {:?}", services); - client - .services_put(&SledAgentTypes::ServiceEnsureBody { - services: services.clone(), - }) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>>(()) + info!( + log, + "attempting to set up sled's Omicron zones: {:?}", zones_config + ); + let result = + client.omicron_zones_put(&zones_config.clone().into()).await; + let Err(error) = result else { + return Ok::< + (), + BackoffError>, + >(()); + }; + + if let sled_agent_client::Error::ErrorResponse(response) = &error { + if response.status() == http::StatusCode::CONFLICT { + warn!( + log, + "ignoring attempt to initialize zones because \ + the server seems to be newer"; + "attempted_generation" => + i64::from(&zones_config.generation), + "req_id" => &response.request_id, + "server_message" => &response.message, + ); + + // If we attempt to initialize zones at generation X, and + // the server refuses because it's at some generation newer + // than X, then we treat that as success. See the doc + // comment on this function. + return Ok(()); + } + } + + // TODO Many other codes here should not be retried. See + // omicron#4578. + return Err(BackoffError::transient(error)); }; let log_failure = |error, delay| { warn!( - self.log, - "failed to initialize services"; + log, + "failed to initialize Omicron zones"; "error" => ?error, "retry_after" => ?delay, ); @@ -310,41 +351,26 @@ impl ServiceInner { Ok(()) } - // Ensure that all services of a particular type are running. + // Ensure that all services for a particular version are running. // // This is useful in a rack-setup context, where initial boot ordering // can matter for first-time-setup. // // Note that after first-time setup, the initialization order of // services should not matter. - async fn ensure_all_services_of_type( + // + // Further, it's possible that the target sled is already running a newer + // version. That's not an error here. + async fn ensure_zone_config_at_least( &self, - service_plan: &ServicePlan, - zone_types: &HashSet, + configs: &HashMap, ) -> Result<(), SetupServiceError> { - futures::future::join_all(service_plan.services.iter().map( - |(sled_address, services_request)| async move { - let services: Vec<_> = services_request - .services - .iter() - .filter_map(|service| { - if zone_types.contains(&service.zone_type) { - Some(service.clone()) - } else { - None - } - }) - .collect(); - if !services.is_empty() { - self.initialize_services_on_sled(*sled_address, &services) - .await?; - } - Ok(()) + cancel_safe_futures::future::join_all_then_try(configs.iter().map( + |(sled_address, zones_config)| async move { + self.initialize_zones_on_sled(*sled_address, zones_config).await }, )) - .await - .into_iter() - .collect::>()?; + .await?; Ok(()) } @@ -360,17 +386,15 @@ impl ServiceInner { let dns_server_ips = // iterate sleds service_plan.services.iter().filter_map( - |(_, services_request)| { - // iterate services for this sled - let dns_addrs: Vec = services_request - .services + |(_, sled_config)| { + // iterate zones for this sled + let dns_addrs: Vec = sled_config + .zones .iter() - .filter_map(|service| { - match &service.services[0] { - ServiceZoneService { - details: ServiceType::InternalDns { http_address, .. }, - .. - } => { + .filter_map(|zone_config| { + match &zone_config.zone_type { + OmicronZoneType::InternalDns { http_address, .. } + => { Some(*http_address) }, _ => None, @@ -546,25 +570,25 @@ impl ServiceInner { // a format which can be processed by Nexus. let mut services: Vec = vec![]; let mut datasets: Vec = vec![]; - for (addr, service_request) in service_plan.services.iter() { + for (addr, sled_config) in service_plan.services.iter() { let sled_id = *id_map .get(addr) .expect("Sled address in service plan, but not sled plan"); - for zone in &service_request.services { - services.extend(zone.into_nexus_service_req(sled_id).map_err( - |err| SetupServiceError::BadConfig(err.to_string()), - )?); + for zone in &sled_config.zones { + services.push(zone.to_nexus_service_req(sled_id)); } - for service in service_request.services.iter() { - if let Some(dataset) = &service.dataset { + for zone in &sled_config.zones { + if let Some((dataset_name, dataset_address)) = + zone.dataset_name_and_address() + { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset.name.pool().id(), - dataset_id: dataset.id, + zpool_id: dataset_name.pool().id(), + dataset_id: zone.id, request: NexusTypes::DatasetPutRequest { - address: dataset.service_address.to_string(), - kind: dataset.name.dataset().clone().convert(), + address: dataset_address.to_string(), + kind: dataset_name.dataset().clone().convert(), }, }) } @@ -700,20 +724,22 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { // Now that datasets and zones have started for CockroachDB, // perform one-time initialization of the cluster. - let sled_address = - service_plan - .services - .iter() - .find_map(|(sled_address, sled_request)| { - if sled_request.services.iter().any(|service| { - service.zone_type == ZoneType::CockroachDb - }) { - Some(sled_address) - } else { - None - } - }) - .expect("Should not create service plans without CockroachDb"); + let sled_address = service_plan + .services + .iter() + .find_map(|(sled_address, sled_config)| { + if sled_config.zones.iter().any(|zone_config| { + matches!( + &zone_config.zone_type, + OmicronZoneType::CockroachDb { .. } + ) + }) { + Some(sled_address) + } else { + None + } + }) + .expect("Should not create service plans without CockroachDb"); let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -753,8 +779,8 @@ impl ServiceInner { // time, it creates an allocation plan to provision subnets to an initial // set of sleds. // - // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making - // requests to the sleds enumerated within the "allocation plan". + // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, + // making requests to the sleds enumerated within the "allocation plan". // // 3. SERVICE ALLOCATION PLAN CREATION. Now that Sled Agents are executing // on their respective subnets, they can be queried to create an @@ -765,7 +791,8 @@ impl ServiceInner { // // 5. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the // rack, a marker file is created at "rss_completed_marker_path()". This - // indicates that the plan executed successfully, and no work remains. + // indicates that the plan executed successfully, and the only work + // remaining is to handoff to Nexus. async fn run( &self, config: &Config, @@ -946,11 +973,49 @@ impl ServiceInner { .await? }; + // The service plan describes all the zones that we will eventually + // deploy on each sled. But we cannot currently just deploy them all + // concurrently. We'll do it in a few stages, each corresponding to a + // version of each sled's configuration. + // + // - version 1: no services running + // (We don't have to do anything for this. But we do + // reserve this version number for "no services running" so + // that sled agents can begin with an initial, valid + // OmicronZonesConfig before they've got anything running.) + // - version 2: internal DNS only + // - version 3: internal DNS + NTP servers + // - version 4: internal DNS + NTP servers + CockroachDB + // - version 5: everything + // + // At each stage, we're specifying a complete configuration of what + // should be running on the sled -- including this version number. + // And Sled Agents will reject requests for versions older than the + // one they're currently running. Thus, the version number is a piece + // of global, distributed state. + // + // For now, we hardcode the requests we make to use specific version + // numbers. + let version1_nothing = + Generation::from(OMICRON_ZONES_CONFIG_INITIAL_GENERATION); + let version2_dns_only = version1_nothing.next(); + let version3_dns_and_ntp = version2_dns_only.next(); + let version4_cockroachdb = version3_dns_and_ntp.next(); + let version5_everything = version4_cockroachdb.next(); + // Set up internal DNS services first and write the initial // DNS configuration to the internal DNS servers. - let mut zone_types = HashSet::new(); - zone_types.insert(ZoneType::InternalDns); - self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + let v1generator = OmicronZonesConfigGenerator::initial_version( + &service_plan, + version1_nothing, + ); + let v2generator = v1generator.new_version_with( + version2_dns_only, + &|zone_type: &OmicronZoneType| { + matches!(zone_type, OmicronZoneType::InternalDns { .. }) + }, + ); + self.ensure_zone_config_at_least(v2generator.sled_configs()).await?; self.initialize_internal_dns_records(&service_plan).await?; // Ask MGS in each switch zone which switch it is. @@ -959,10 +1024,17 @@ impl ServiceInner { .await; // Next start up the NTP services. - // Note we also specify internal DNS services again because it - // can ony be additive. - zone_types.insert(ZoneType::Ntp); - self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + let v3generator = v2generator.new_version_with( + version3_dns_and_ntp, + &|zone_type: &OmicronZoneType| { + matches!( + zone_type, + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + ) + }, + ); + self.ensure_zone_config_at_least(v3generator.sled_configs()).await?; // Wait until time is synchronized on all sleds before proceeding. self.wait_for_timesync(&sled_addresses).await?; @@ -970,35 +1042,22 @@ impl ServiceInner { info!(self.log, "Finished setting up Internal DNS and NTP"); // Wait until Cockroach has been initialized before running Nexus. - zone_types.insert(ZoneType::CockroachDb); - self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + let v4generator = v3generator.new_version_with( + version4_cockroachdb, + &|zone_type: &OmicronZoneType| { + matches!(zone_type, OmicronZoneType::CockroachDb { .. }) + }, + ); + self.ensure_zone_config_at_least(v4generator.sled_configs()).await?; // Now that datasets and zones have started for CockroachDB, // perform one-time initialization of the cluster. self.initialize_cockroach(&service_plan).await?; - // Issue service initialization requests. - futures::future::join_all(service_plan.services.iter().map( - |(sled_address, services_request)| async move { - // With the current implementation of "initialize_services_on_sled", - // we must provide the set of *all* services that should be - // executing on a sled. - // - // This means re-requesting the DNS and NTP services, even if - // they are already running - this is fine, however, as the - // receiving sled agent doesn't modify the already-running - // service. - self.initialize_services_on_sled( - *sled_address, - &services_request.services, - ) - .await?; - Ok(()) - }, - )) - .await - .into_iter() - .collect::, SetupServiceError>>()?; + // Issue the rest of the zone initialization requests. + let v5generator = + v4generator.new_version_with(version5_everything, &|_| true); + self.ensure_zone_config_at_least(v5generator.sled_configs()).await?; info!(self.log, "Finished setting up services"); @@ -1031,3 +1090,272 @@ impl ServiceInner { Ok(()) } } + +/// Facilitates creating a sequence of OmicronZonesConfig objects for each sled +/// in a service plan to enable phased rollout of services +/// +/// The service plan itself defines which zones should appear on every sled. +/// However, we want to deploy these zones in phases: first internal DNS, then +/// NTP, then CockroachDB, etc. This interface generates sled configs for each +/// phase and enforces that: +/// +/// - each version includes all zones deployed in the previous iteration +/// - each sled's version number increases with each iteration +/// +struct OmicronZonesConfigGenerator<'a> { + service_plan: &'a ServicePlan, + last_configs: HashMap, +} + +impl<'a> OmicronZonesConfigGenerator<'a> { + /// Make a set of sled configurations for an initial version where each sled + /// has nothing deployed on it + fn initial_version( + service_plan: &'a ServicePlan, + initial_version: Generation, + ) -> Self { + let last_configs = service_plan + .services + .keys() + .map(|sled_address| { + ( + *sled_address, + OmicronZonesConfig { + generation: initial_version, + zones: vec![], + }, + ) + }) + .collect(); + Self { service_plan, last_configs } + } + + /// Returns the set of sled configurations produced for this version + fn sled_configs(&self) -> &HashMap { + &self.last_configs + } + + /// Produces a new set of configs for each sled based on the current set of + /// configurations, adding zones from the service plan matching + /// `zone_filter`. + /// + /// # Panics + /// + /// If `version` is not larger than the current version + fn new_version_with( + self, + version: Generation, + zone_filter: &(dyn Fn(&OmicronZoneType) -> bool + Send + Sync), + ) -> OmicronZonesConfigGenerator<'a> { + let last_configs = self + .service_plan + .services + .iter() + .map(|(sled_address, sled_config)| { + let mut zones = match self.last_configs.get(sled_address) { + Some(config) => { + assert!(version > config.generation); + config.zones.clone() + } + None => Vec::new(), + }; + + let zones_already = + zones.iter().map(|z| z.id).collect::>(); + zones.extend( + sled_config + .zones + .iter() + .filter(|z| { + !zones_already.contains(&z.id) + && zone_filter(&z.zone_type) + }) + .cloned(), + ); + + let config = OmicronZonesConfig { generation: version, zones }; + (*sled_address, config) + }) + .collect(); + Self { service_plan: self.service_plan, last_configs } + } +} + +#[cfg(test)] +mod test { + use super::OmicronZonesConfigGenerator; + use crate::{ + params::OmicronZoneType, + rack_setup::plan::service::{Plan as ServicePlan, SledInfo}, + }; + use illumos_utils::zpool::ZpoolName; + use omicron_common::{address::Ipv6Subnet, api::external::Generation}; + + fn make_test_service_plan() -> ServicePlan { + let rss_config = crate::bootstrap::params::test_config(); + let fake_sleds = vec![ + SledInfo::new( + "d4ba4bbe-8542-4907-bc8f-48df53eb5089".parse().unwrap(), + Ipv6Subnet::new("fd00:1122:3344:101::1".parse().unwrap()), + "[fd00:1122:3344:101::1]:80".parse().unwrap(), + vec![ + ZpoolName::new_internal( + "c5885278-0ae2-4f1e-9223-07f2ada818e1".parse().unwrap(), + ), + ZpoolName::new_internal( + "57465977-8275-43aa-a320-b6cd5cb20ca6".parse().unwrap(), + ), + ZpoolName::new_external( + "886f9fe7-bf70-4ddd-ae92-764dc3ed14ab".parse().unwrap(), + ), + ZpoolName::new_external( + "4c9061b1-345b-4985-8cbd-a2a899f15b68".parse().unwrap(), + ), + ZpoolName::new_external( + "b2bd488e-b187-42a0-b157-9ab0f70d91a8".parse().unwrap(), + ), + ], + true, + ), + SledInfo::new( + "b4359dea-665d-41ca-a681-f55912f2d5d0".parse().unwrap(), + Ipv6Subnet::new("fd00:1122:3344:102::1".parse().unwrap()), + "[fd00:1122:3344:102::1]:80".parse().unwrap(), + vec![ + ZpoolName::new_internal( + "34d6b5e5-a09f-4e96-a599-fa306ce6d983".parse().unwrap(), + ), + ZpoolName::new_internal( + "e9b8d1ea-da29-4b61-a493-c0ed319098da".parse().unwrap(), + ), + ZpoolName::new_external( + "37f8e903-2adb-4613-b78c-198122c289f0".parse().unwrap(), + ), + ZpoolName::new_external( + "b50f787c-97b3-4b91-a5bd-99d11fc86fb8".parse().unwrap(), + ), + ZpoolName::new_external( + "809e50c8-930e-413a-950c-69a540b688e2".parse().unwrap(), + ), + ], + true, + ), + ]; + let service_plan = + ServicePlan::create_transient(&rss_config, fake_sleds) + .expect("failed to create service plan"); + + service_plan + } + + #[test] + fn test_omicron_zone_configs() { + let service_plan = make_test_service_plan(); + + // Verify the initial state. + let g1 = Generation::new(); + let v1 = + OmicronZonesConfigGenerator::initial_version(&service_plan, g1); + assert_eq!( + service_plan.services.keys().len(), + v1.sled_configs().keys().len() + ); + for (_, configs) in v1.sled_configs() { + assert_eq!(configs.generation, g1); + assert!(configs.zones.is_empty()); + } + + // Verify that we can add a bunch of zones of a given type. + let g2 = g1.next(); + let v2 = v1.new_version_with(g2, &|zone_type| { + matches!(zone_type, OmicronZoneType::InternalDns { .. }) + }); + let mut v2_nfound = 0; + for (_, config) in v2.sled_configs() { + assert_eq!(config.generation, g2); + v2_nfound += config.zones.len(); + for z in &config.zones { + // The only zones we should find are the Internal DNS ones. + assert!(matches!( + &z.zone_type, + OmicronZoneType::InternalDns { .. } + )); + } + } + // There should have been at least one InternalDns zone. + assert!(v2_nfound > 0); + + // Try again to add zones of the same type. This should be a no-op. + let g3 = g2.next(); + let v3 = v2.new_version_with(g3, &|zone_type| { + matches!(zone_type, OmicronZoneType::InternalDns { .. }) + }); + let mut v3_nfound = 0; + for (_, config) in v3.sled_configs() { + assert_eq!(config.generation, g3); + v3_nfound += config.zones.len(); + for z in &config.zones { + // The only zones we should find are the Internal DNS ones. + assert!(matches!( + &z.zone_type, + OmicronZoneType::InternalDns { .. } + )); + } + } + assert_eq!(v2_nfound, v3_nfound); + + // Now try adding zones of a different type. We should still have all + // the Internal DNS ones, plus a few more. + let g4 = g3.next(); + let v4 = v3.new_version_with(g4, &|zone_type| { + matches!(zone_type, OmicronZoneType::Nexus { .. }) + }); + let mut v4_nfound_dns = 0; + let mut v4_nfound = 0; + for (_, config) in v4.sled_configs() { + assert_eq!(config.generation, g4); + v4_nfound += config.zones.len(); + for z in &config.zones { + match &z.zone_type { + OmicronZoneType::InternalDns { .. } => v4_nfound_dns += 1, + OmicronZoneType::Nexus { .. } => (), + _ => panic!("unexpectedly found a wrong zone type"), + } + } + } + assert_eq!(v4_nfound_dns, v3_nfound); + assert!(v4_nfound > v3_nfound); + + // Now try adding zones that match no filter. Again, this should be a + // no-op but we should still have all the same zones we had before. + let g5 = g4.next(); + let v5 = v4.new_version_with(g5, &|_| false); + let mut v5_nfound = 0; + for (_, config) in v5.sled_configs() { + assert_eq!(config.generation, g5); + v5_nfound += config.zones.len(); + for z in &config.zones { + assert!(matches!( + &z.zone_type, + OmicronZoneType::InternalDns { .. } + | OmicronZoneType::Nexus { .. } + )); + } + } + assert_eq!(v4_nfound, v5_nfound); + + // Finally, try adding the rest of the zones. + let g6 = g5.next(); + let v6 = v5.new_version_with(g6, &|_| true); + let mut v6_nfound = 0; + for (sled_address, config) in v6.sled_configs() { + assert_eq!(config.generation, g6); + v6_nfound += config.zones.len(); + assert_eq!( + config.zones.len(), + service_plan.services.get(sled_address).unwrap().zones.len() + ); + } + assert!(v6_nfound > v5_nfound); + } +} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 2caa640e22..88f79e7064 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,10 +20,10 @@ //! of what other services Nexus wants to have executing on the sled. //! //! To accomplish this, the following interfaces are exposed: -//! - [ServiceManager::ensure_all_services_persistent] exposes an API to request -//! a set of services that should persist beyond reboot. +//! - [ServiceManager::ensure_all_omicron_zones_persistent] exposes an API to +//! request a set of Omicron zones that should persist beyond reboot. //! - [ServiceManager::activate_switch] exposes an API to specifically enable -//! or disable (via [ServiceManager::deactivate_switch]) the switch zone. +//! or disable (via [ServiceManager::deactivate_switch]) the switch zone. use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, @@ -31,11 +31,11 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::BootstrapNetworking; use crate::config::SidecarRevision; use crate::params::{ - DendriteAsic, ServiceEnsureBody, ServiceType, ServiceZoneRequest, - ServiceZoneService, TimeSync, ZoneBundleCause, ZoneBundleMetadata, - ZoneType, + DendriteAsic, OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, + TimeSync, ZoneBundleCause, ZoneBundleMetadata, ZoneType, }; use crate::profile::*; +use crate::services_migration::{AllZoneRequests, SERVICES_LEDGER_FILENAME}; use crate::smf_helper::Service; use crate::smf_helper::SmfHelper; use crate::zone_bundle::BundleError; @@ -89,13 +89,14 @@ use omicron_common::nexus_config::{ }; use once_cell::sync::OnceCell; use rand::prelude::SliceRandom; -use rand::SeedableRng; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::underlay::BOOTSTRAP_PREFIX; use sled_hardware::Baseboard; use sled_hardware::SledMode; -use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET}; +use sled_storage::dataset::{ + DatasetKind, DatasetName, CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET, +}; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; @@ -108,9 +109,8 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use tokio::io::AsyncWriteExt; -use tokio::sync::oneshot; use tokio::sync::Mutex; -use tokio::sync::MutexGuard; +use tokio::sync::{oneshot, MutexGuard}; use tokio::task::JoinHandle; use uuid::Uuid; @@ -198,9 +198,6 @@ pub enum Error { #[error("Could not initialize service {service} as requested: {message}")] BadServiceRequest { service: String, message: String }, - #[error("Services already configured for this Sled Agent")] - ServicesAlreadyConfigured, - #[error("Failed to get address: {0}")] GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), @@ -224,6 +221,17 @@ pub enum Error { #[error("Error querying simnet devices")] Simnet(#[from] GetSimnetError), + + #[error( + "Requested generation ({requested}) is older than current ({current})" + )] + RequestedConfigOutdated { requested: Generation, current: Generation }, + + #[error("Requested generation {0} with different zones than before")] + RequestedConfigConflicts(Generation), + + #[error("Error migrating old-format services ledger: {0:#}")] + ServicesMigration(anyhow::Error), } impl Error { @@ -237,8 +245,18 @@ impl Error { impl From for omicron_common::api::external::Error { fn from(err: Error) -> Self { - omicron_common::api::external::Error::InternalError { - internal_message: err.to_string(), + match err { + err @ Error::RequestedConfigConflicts(_) => { + omicron_common::api::external::Error::invalid_request( + &err.to_string(), + ) + } + err @ Error::RequestedConfigOutdated { .. } => { + omicron_common::api::external::Error::conflict(&err.to_string()) + } + _ => omicron_common::api::external::Error::InternalError { + internal_message: err.to_string(), + }, } } } @@ -274,42 +292,176 @@ impl Config { } // The filename of the ledger, within the provided directory. -const SERVICES_LEDGER_FILENAME: &str = "services.json"; - -// A wrapper around `ZoneRequest`, which allows it to be serialized -// to a JSON file. -#[derive(Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -struct AllZoneRequests { - generation: Generation, - requests: Vec, +const ZONES_LEDGER_FILENAME: &str = "omicron-zones.json"; + +/// Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus +/// wants for all of its zones) with the locally-determined configuration for +/// these zones. +#[derive( + Clone, Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema, +)] +pub struct OmicronZonesConfigLocal { + /// generation of the Omicron-provided part of the configuration + /// + /// This generation number is outside of Sled Agent's control. We store + /// exactly what we were given and use this number to decide when to + /// fail requests to establish an outdated configuration. + /// + /// You can think of this as a major version number, with + /// `ledger_generation` being a minor version number. See + /// `is_newer_than()`. + pub omicron_generation: Generation, + + /// ledger-managed generation number + /// + /// This generation is managed by the ledger facility itself. It's bumped + /// whenever we write a new ledger. In practice, we don't currently have + /// any reason to bump this _for a given Omicron generation_ so it's + /// somewhat redundant. In principle, if we needed to modify the ledgered + /// configuration due to some event that doesn't change the Omicron config + /// (e.g., if we wanted to move the root filesystem to a different path), we + /// could do that by bumping this generation. + pub ledger_generation: Generation, + pub zones: Vec, } -impl Default for AllZoneRequests { - fn default() -> Self { - Self { generation: Generation::new(), requests: vec![] } +impl Ledgerable for OmicronZonesConfigLocal { + fn is_newer_than(&self, other: &OmicronZonesConfigLocal) -> bool { + self.omicron_generation > other.omicron_generation + || (self.omicron_generation == other.omicron_generation + && self.ledger_generation >= other.ledger_generation) + } + + fn generation_bump(&mut self) { + self.ledger_generation = self.ledger_generation.next(); } } -impl Ledgerable for AllZoneRequests { - fn is_newer_than(&self, other: &AllZoneRequests) -> bool { - self.generation >= other.generation +impl OmicronZonesConfigLocal { + /// Returns the initial configuration for generation 1, which has no zones + pub fn initial() -> OmicronZonesConfigLocal { + OmicronZonesConfigLocal { + omicron_generation: Generation::new(), + ledger_generation: Generation::new(), + zones: vec![], + } } - fn generation_bump(&mut self) { - self.generation = self.generation.next(); + pub fn to_omicron_zones_config(self) -> OmicronZonesConfig { + OmicronZonesConfig { + generation: self.omicron_generation, + zones: self.zones.into_iter().map(|z| z.zone).collect(), + } } } -// This struct represents the combo of "what zone did you ask for" + "where did -// we put it". -#[derive(Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -struct ZoneRequest { - zone: ServiceZoneRequest, - // TODO: Consider collapsing "root" into ServiceZoneRequest +/// Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus +/// wants for this zone) with any locally-determined configuration (like the +/// path to the root filesystem) +#[derive( + Clone, Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema, +)] +pub struct OmicronZoneConfigLocal { + pub zone: OmicronZoneConfig, #[schemars(with = "String")] + pub root: Utf8PathBuf, +} + +/// Describes how we want a switch zone to be configured +/// +/// This is analogous to `OmicronZoneConfig`, but for the switch zone (which is +/// operated autonomously by the Sled Agent, not managed by Omicron). +#[derive(Clone)] +struct SwitchZoneConfig { + id: Uuid, + addresses: Vec, + services: Vec, +} + +/// Describes one of several services that may be deployed in a switch zone +/// +/// Some of these are only present in certain configurations (e.g., with a real +/// Tofino vs. SoftNPU) or are configured differently depending on the +/// configuration. +#[derive(Clone)] +enum SwitchService { + ManagementGatewayService, + Wicketd { baseboard: Baseboard }, + Dendrite { asic: DendriteAsic }, + Tfport { pkt_source: String, asic: DendriteAsic }, + Uplink, + MgDdm { mode: String }, + Mgd, + SpSim, +} + +impl crate::smf_helper::Service for SwitchService { + fn service_name(&self) -> String { + match self { + SwitchService::ManagementGatewayService => "mgs", + SwitchService::Wicketd { .. } => "wicketd", + SwitchService::Dendrite { .. } => "dendrite", + SwitchService::Tfport { .. } => "tfport", + SwitchService::Uplink { .. } => "uplink", + SwitchService::MgDdm { .. } => "mg-ddm", + SwitchService::Mgd => "mgd", + SwitchService::SpSim => "sp-sim", + } + .to_owned() + } + fn smf_name(&self) -> String { + format!("svc:/oxide/{}", self.service_name()) + } + fn should_import(&self) -> bool { + true + } +} + +/// Combines the generic `SwitchZoneConfig` with other locally-determined +/// configuration +/// +/// This is analogous to `OmicronZoneConfigLocal`, but for the switch zone. +struct SwitchZoneConfigLocal { + zone: SwitchZoneConfig, root: Utf8PathBuf, } +/// Describes either an Omicron-managed zone or the switch zone, used for +/// functions that operate on either one or the other +enum ZoneArgs<'a> { + Omicron(&'a OmicronZoneConfigLocal), + Switch(&'a SwitchZoneConfigLocal), +} + +impl<'a> ZoneArgs<'a> { + /// If this is an Omicron zone, return its type + pub fn omicron_type(&self) -> Option<&'a OmicronZoneType> { + match self { + ZoneArgs::Omicron(zone_config) => Some(&zone_config.zone.zone_type), + ZoneArgs::Switch(_) => None, + } + } + + /// If this is a sled-local (switch) zone, iterate over the services it's + /// supposed to be running + pub fn sled_local_services( + &self, + ) -> Box + 'a> { + match self { + ZoneArgs::Omicron(_) => Box::new(std::iter::empty()), + ZoneArgs::Switch(request) => Box::new(request.zone.services.iter()), + } + } + + /// Return the root filesystem path for this zone + pub fn root(&self) -> &Utf8Path { + match self { + ZoneArgs::Omicron(zone_config) => &zone_config.root, + ZoneArgs::Switch(zone_request) => &zone_request.root, + } + } +} + struct Task { // A signal for the initializer task to terminate exit_tx: oneshot::Sender<()>, @@ -335,7 +487,7 @@ enum SledLocalZone { // of certain links. Initializing { // The request for the zone - request: ServiceZoneRequest, + request: SwitchZoneConfig, // A background task which keeps looping until the zone is initialized worker: Option, // Filesystems for the switch zone to mount @@ -348,7 +500,7 @@ enum SledLocalZone { // The Zone is currently running. Running { // The original request for the zone - request: ServiceZoneRequest, + request: SwitchZoneConfig, // The currently running zone zone: RunningZone, }, @@ -485,6 +637,173 @@ impl ServiceManager { .collect() } + async fn all_omicron_zone_ledgers(&self) -> Vec { + if let Some(dir) = self.inner.ledger_directory_override.get() { + return vec![dir.join(ZONES_LEDGER_FILENAME)]; + } + let resources = self.inner.storage.get_latest_resources().await; + resources + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(ZONES_LEDGER_FILENAME)) + .collect() + } + + // Loads persistent configuration about any Omicron-managed zones that we're + // supposed to be running. + // + // For historical reasons, there are two possible places this configuration + // could live, each with its own format. This function first checks the + // newer one. If no configuration was found there, it checks the older + // one. If only the older one was found, it is converted into the new form + // so that future calls will only look at the new form. + async fn load_ledgered_zones( + &self, + // This argument attempts to ensure that the caller holds the right + // lock. + _map: &MutexGuard<'_, BTreeMap>, + ) -> Result>, Error> { + // First, try to load the current software's zone ledger. If that + // works, we're done. + let log = &self.inner.log; + let ledger_paths = self.all_omicron_zone_ledgers().await; + info!(log, "Loading Omicron zones from: {ledger_paths:?}"); + let maybe_ledger = + Ledger::::new(log, ledger_paths.clone()) + .await; + + if let Some(ledger) = maybe_ledger { + info!( + log, + "Loaded Omicron zones"; + "zones_config" => ?ledger.data() + ); + return Ok(Some(ledger)); + } + + // Now look for the ledger used by previous versions. If we find it, + // we'll convert it and write out a new ledger used by the current + // software. + info!( + log, + "Loading Omicron zones - No zones detected \ + (will look for old-format services)" + ); + let services_ledger_paths = self.all_service_ledgers().await; + info!( + log, + "Loading old-format services from: {services_ledger_paths:?}" + ); + + let maybe_ledger = + Ledger::::new(log, services_ledger_paths.clone()) + .await; + let maybe_converted = match maybe_ledger { + None => { + // The ledger ignores all errors attempting to load files. That + // might be fine most of the time. In this case, we want to + // raise a big red flag if we find an old-format ledger that we + // can't process. + if services_ledger_paths.iter().any(|p| p.exists()) { + Err(Error::ServicesMigration(anyhow!( + "failed to read or parse old-format ledger, \ + but one exists" + ))) + } else { + // There was no old-format ledger at all. + return Ok(None); + } + } + Some(ledger) => { + let all_services = ledger.into_inner(); + OmicronZonesConfigLocal::try_from(all_services) + .map_err(Error::ServicesMigration) + } + }; + + match maybe_converted { + Err(error) => { + // We've tried to test thoroughly so that this should never + // happen. If for some reason it does happen, engineering + // intervention is likely to be required to figure out how to + // proceed. The current software does not directly support + // whatever was in the ledger, and it's not safe to just come up + // with no zones when we're supposed to be running stuff. We'll + // need to figure out what's unexpected about what we found in + // the ledger and figure out how to fix the + // conversion. + error!( + log, + "Loading Omicron zones - found services but failed \ + to convert them (support intervention required): \ + {:#}", + error + ); + return Err(error); + } + Ok(new_config) => { + // We've successfully converted the old ledger. Write a new + // one. + info!( + log, + "Successfully migrated old-format services ledger to \ + zones ledger" + ); + let mut ledger = Ledger::::new_with( + log, + ledger_paths.clone(), + new_config, + ); + + ledger.commit().await?; + + // We could consider removing the old ledger here. That would + // not guarantee that it would be gone, though, because we could + // crash during `ledger.commit()` above having written at least + // one of the new ledgers. In that case, we won't go through + // this code path again on restart. If we wanted to ensure the + // old-format ledger was gone after the migration, we could + // consider unconditionally removing the old ledger paths in the + // caller, after we've got a copy of the new-format ledger. + // + // Should we? In principle, it shouldn't matter either way + // because we will never look at the old-format ledger unless we + // don't have a new-format one, and we should now have a + // new-format one forever now. + // + // When might it matter? Two cases: + // + // (1) If the sled agent is downgraded to a previous version + // that doesn't know about the new-format ledger. Do we + // want that sled agent to use the old-format one? It + // depends. If that downgrade happens immediately because + // the upgrade to the first new-format version was a + // disaster, then we'd probably rather the downgraded sled + // agent _did_ start its zones. If the downgrade happens + // months later, potentially after various additional + // reconfigurations, then that old-format ledger is probably + // out of date and shouldn't be used. There's no way to + // really know which case we're in, but the latter seems + // quite unlikely (why would we downgrade so far back after + // so long?). So that's a reason to keep the old-format + // ledger. + // + // (2) Suppose a developer or Oxide support engineer removes the + // new ledger for some reason, maybe thinking sled agent + // would come up with no zones running. They'll be + // surprised to discover that it actually starts running a + // potentially old set of zones. This probably only matters + // on a production system, and even then, it probably + // shouldn't happen. + // + // Given these cases, we're left ambivalent. We choose to keep + // the old ledger around. If nothing else, if something goes + // wrong, we'll have a copy of its last contents! + Ok(Some(ledger)) + } + } + } + // TODO(https://github.com/oxidecomputer/omicron/issues/2973): // // The sled agent retries this function indefinitely at the call-site, but @@ -495,65 +814,67 @@ impl ServiceManager { // more clearly. pub async fn load_services(&self) -> Result<(), Error> { let log = &self.inner.log; - let ledger_paths = self.all_service_ledgers().await; - info!(log, "Loading services from: {ledger_paths:?}"); - let mut existing_zones = self.inner.zones.lock().await; let Some(mut ledger) = - Ledger::::new(log, ledger_paths).await + self.load_ledgered_zones(&existing_zones).await? else { - info!(log, "Loading services - No services detected"); + // Nothing found -- nothing to do. + info!( + log, + "Loading Omicron zones - \ + no zones nor old-format services found" + ); return Ok(()); }; - let services = ledger.data_mut(); + + let zones_config = ledger.data_mut(); + info!( + log, + "Loaded Omicron zones"; + "zones_config" => ?zones_config + ); + let omicron_zones_config = + zones_config.clone().to_omicron_zones_config(); // Initialize internal DNS only first: we need it to look up the // boundary switch addresses. This dependency is implicit: when we call - // `ensure_all_services` below, we eventually land in + // `ensure_all_omicron_zones` below, we eventually land in // `opte_ports_needed()`, which for some service types (including Ntp // but _not_ including InternalDns), we perform internal DNS lookups. let all_zones_request = self - .ensure_all_services( + .ensure_all_omicron_zones( &mut existing_zones, - &AllZoneRequests::default(), - ServiceEnsureBody { - services: services - .requests - .clone() - .into_iter() - .filter(|svc| { - matches!( - svc.zone.zone_type, - ZoneType::InternalDns | ZoneType::Ntp - ) - }) - .map(|zone_request| zone_request.zone) - .collect(), + None, + omicron_zones_config.clone(), + |z: &OmicronZoneConfig| { + matches!( + z.zone_type, + OmicronZoneType::InternalDns { .. } + | OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + ) }, ) .await?; // Initialize NTP services next as they are required for time // synchronization, which is a pre-requisite for the other services. We - // keep `ZoneType::InternalDns` because `ensure_all_services` is - // additive. + // keep `OmicronZoneType::InternalDns` because + // `ensure_all_omicron_zones` is additive. + // TODO This looks like a duplicate of the block above -- why do we do + // this? let all_zones_request = self - .ensure_all_services( + .ensure_all_omicron_zones( &mut existing_zones, - &all_zones_request, - ServiceEnsureBody { - services: services - .requests - .clone() - .into_iter() - .filter(|svc| { - matches!( - svc.zone.zone_type, - ZoneType::InternalDns | ZoneType::Ntp - ) - }) - .map(|zone_request| zone_request.zone) - .collect(), + Some(&all_zones_request), + omicron_zones_config.clone(), + |z: &OmicronZoneConfig| { + matches!( + z.zone_type, + OmicronZoneType::InternalDns { .. } + | OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + ) }, ) .await?; @@ -595,17 +916,11 @@ impl ServiceManager { let mut existing_zones = self.inner.zones.lock().await; // Initialize all remaining services - self.ensure_all_services( + self.ensure_all_omicron_zones( &mut existing_zones, - &all_zones_request, - ServiceEnsureBody { - services: services - .requests - .clone() - .into_iter() - .map(|zone_request| zone_request.zone) - .collect(), - }, + Some(&all_zones_request), + omicron_zones_config, + |_| true, ) .await?; Ok(()) @@ -661,11 +976,11 @@ impl ServiceManager { // Check the services intended to run in the zone to determine whether any // physical devices need to be mapped into the zone when it is created. - fn devices_needed(req: &ServiceZoneRequest) -> Result, Error> { + fn devices_needed(zone_args: &ZoneArgs<'_>) -> Result, Error> { let mut devices = vec![]; - for svc in &req.services { - match &svc.details { - ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic } => { + for svc_details in zone_args.sled_local_services() { + match svc_details { + SwitchService::Dendrite { asic: DendriteAsic::TofinoAsic } => { if let Ok(Some(n)) = tofino::get_tofino() { if let Ok(device_path) = n.device_path() { devices.push(device_path); @@ -676,7 +991,7 @@ impl ServiceManager { device: "tofino".to_string(), }); } - ServiceType::Dendrite { + SwitchService::Dendrite { asic: DendriteAsic::SoftNpuPropolisDevice, } => { devices.push("/dev/tty03".into()); @@ -700,18 +1015,17 @@ impl ServiceManager { // bootstrap address. fn bootstrap_address_needed( &self, - req: &ServiceZoneRequest, + zone_args: &ZoneArgs<'_>, ) -> Result, Error> { - match req.zone_type { - ZoneType::Switch => { - let link = self - .inner - .bootstrap_vnic_allocator - .new_bootstrap() - .map_err(Error::SledLocalVnicCreation)?; - Ok(Some((link, self.inner.switch_zone_bootstrap_address))) - } - _ => Ok(None), + if let ZoneArgs::Switch(_) = zone_args { + let link = self + .inner + .bootstrap_vnic_allocator + .new_bootstrap() + .map_err(Error::SledLocalVnicCreation)?; + Ok(Some((link, self.inner.switch_zone_bootstrap_address))) + } else { + Ok(None) } } @@ -736,7 +1050,7 @@ impl ServiceManager { // local addresses in the zone. fn links_needed( &self, - req: &ServiceZoneRequest, + zone_args: &ZoneArgs<'_>, ) -> Result, Error> { let mut links: Vec<(Link, bool)> = Vec::new(); @@ -744,12 +1058,12 @@ impl ServiceManager { Error::Underlay(underlay::Error::SystemDetection(e)) })?; - for svc in &req.services { - match &svc.details { - ServiceType::Tfport { pkt_source, asic: _ } => { - // The tfport service requires a MAC device to/from which sidecar - // packets may be multiplexed. If the link isn't present, don't - // bother trying to start the zone. + for svc_details in zone_args.sled_local_services() { + match &svc_details { + SwitchService::Tfport { pkt_source, asic: _ } => { + // The tfport service requires a MAC device to/from which + // sidecar packets may be multiplexed. If the link isn't + // present, don't bother trying to start the zone. match Dladm::verify_link(pkt_source) { Ok(link) => { // It's important that tfpkt does **not** receive a @@ -765,7 +1079,7 @@ impl ServiceManager { } } } - ServiceType::MgDdm { .. } => { + SwitchService::MgDdm { .. } => { // If on a non-gimlet, sled-agent can be configured to map // links into the switch zone. Validate those links here. for link in &self.inner.switch_zone_maghemite_links { @@ -796,15 +1110,18 @@ impl ServiceManager { } // Check the services intended to run in the zone to determine whether any - // OPTE ports need to be created and mapped into the zone when it is created. + // OPTE ports need to be created and mapped into the zone when it is + // created. async fn opte_ports_needed( &self, - req: &ServiceZoneRequest, + zone_args: &ZoneArgs<'_>, ) -> Result, Error> { // Only some services currently need OPTE ports if !matches!( - req.zone_type, - ZoneType::ExternalDns | ZoneType::Nexus | ZoneType::Ntp + zone_args.omicron_type(), + Some(OmicronZoneType::ExternalDns { .. }) + | Some(OmicronZoneType::Nexus { .. }) + | Some(OmicronZoneType::BoundaryNtp { .. }) ) { return Ok(vec![]); } @@ -851,100 +1168,120 @@ impl ServiceManager { }) .collect(); - let mut ports = vec![]; - for svc in &req.services { - let external_ip; - let (nic, snat, external_ips) = match &svc.details { - ServiceType::Nexus { external_ip, nic, .. } => { - (nic, None, std::slice::from_ref(external_ip)) - } - ServiceType::ExternalDns { dns_address, nic, .. } => { - external_ip = dns_address.ip(); - (nic, None, std::slice::from_ref(&external_ip)) - } - ServiceType::BoundaryNtp { nic, snat_cfg, .. } => { - (nic, Some(*snat_cfg), &[][..]) - } - _ => continue, - }; - - // Create the OPTE port for the service. - // Note we don't plumb any firewall rules at this point, - // Nexus will plumb them down later but the default OPTE - // config allows outbound access which is enough for - // Boundary NTP which needs to come up before Nexus. - let port = port_manager - .create_port(nic, snat, external_ips, &[], DhcpCfg::default()) - .map_err(|err| Error::ServicePortCreation { - service: svc.details.to_string(), - err: Box::new(err), - })?; + let external_ip; + let (zone_type_str, nic, snat, external_ips) = match &zone_args + .omicron_type() + { + Some( + zone_type @ OmicronZoneType::Nexus { external_ip, nic, .. }, + ) => ( + zone_type.zone_type_str(), + nic, + None, + std::slice::from_ref(external_ip), + ), + Some( + zone_type @ OmicronZoneType::ExternalDns { + dns_address, + nic, + .. + }, + ) => { + external_ip = dns_address.ip(); + ( + zone_type.zone_type_str(), + nic, + None, + std::slice::from_ref(&external_ip), + ) + } + Some( + zone_type @ OmicronZoneType::BoundaryNtp { + nic, snat_cfg, .. + }, + ) => (zone_type.zone_type_str(), nic, Some(*snat_cfg), &[][..]), + _ => unreachable!("unexpected zone type"), + }; - // We also need to update the switch with the NAT mappings - let (target_ip, first_port, last_port) = match snat { - Some(s) => (s.ip, s.first_port, s.last_port), - None => (external_ips[0], 0, u16::MAX), - }; + // Create the OPTE port for the service. + // Note we don't plumb any firewall rules at this point, + // Nexus will plumb them down later but the default OPTE + // config allows outbound access which is enough for + // Boundary NTP which needs to come up before Nexus. + let port = port_manager + .create_port(nic, snat, external_ips, &[], DhcpCfg::default()) + .map_err(|err| Error::ServicePortCreation { + service: zone_type_str.clone(), + err: Box::new(err), + })?; - for dpd_client in &dpd_clients { - // TODO-correctness(#2933): If we fail part-way we need to - // clean up previous entries instead of leaking them. - let nat_create = || async { - info!( - self.inner.log, "creating NAT entry for service"; - "service" => ?svc, - ); + // We also need to update the switch with the NAT mappings + let (target_ip, first_port, last_port) = match snat { + Some(s) => (s.ip, s.first_port, s.last_port), + None => (external_ips[0], 0, u16::MAX), + }; - dpd_client - .ensure_nat_entry( - &self.inner.log, - target_ip.into(), - dpd_client::types::MacAddr { - a: port.0.mac().into_array(), - }, - first_port, - last_port, - port.0.vni().as_u32(), - underlay_address, - ) - .await - .map_err(BackoffError::transient)?; + for dpd_client in &dpd_clients { + // TODO-correctness(#2933): If we fail part-way we need to + // clean up previous entries instead of leaking them. + let nat_create = || async { + info!( + self.inner.log, "creating NAT entry for service"; + "zone_type" => &zone_type_str, + ); - Ok::<(), BackoffError>>(()) - }; - let log_failure = |error, _| { - warn!( - self.inner.log, "failed to create NAT entry for service"; - "error" => ?error, - "service" => ?svc, - ); - }; - retry_notify( - retry_policy_internal_service_aggressive(), - nat_create, - log_failure, - ) - .await?; - } + dpd_client + .ensure_nat_entry( + &self.inner.log, + target_ip.into(), + dpd_client::types::MacAddr { + a: port.0.mac().into_array(), + }, + first_port, + last_port, + port.0.vni().as_u32(), + underlay_address, + ) + .await + .map_err(BackoffError::transient)?; - ports.push(port); + Ok::<(), BackoffError>>(()) + }; + let log_failure = |error, _| { + warn!( + self.inner.log, "failed to create NAT entry for service"; + "error" => ?error, + "zone_type" => &zone_type_str, + ); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + nat_create, + log_failure, + ) + .await?; } - - Ok(ports) + Ok(vec![port]) } // Check the services intended to run in the zone to determine whether any // additional privileges need to be enabled for the zone. - fn privs_needed(req: &ServiceZoneRequest) -> Vec { + fn privs_needed(zone_args: &ZoneArgs<'_>) -> Vec { let mut needed = Vec::new(); - for svc in &req.services { - match &svc.details { - ServiceType::Tfport { .. } => { + for svc_details in zone_args.sled_local_services() { + match svc_details { + SwitchService::Tfport { .. } => { needed.push("default".to_string()); needed.push("sys_dl_config".to_string()); } - ServiceType::BoundaryNtp { .. } - | ServiceType::InternalNtp { .. } => { + _ => (), + } + } + + if let Some(omicron_zone_type) = zone_args.omicron_type() { + match omicron_zone_type { + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } => { needed.push("default".to_string()); needed.push("sys_time".to_string()); needed.push("proc_priocntl".to_string()); @@ -1048,13 +1385,13 @@ impl ServiceManager { async fn initialize_zone( &self, - request: &ZoneRequest, + request: ZoneArgs<'_>, filesystems: &[zone::Fs], data_links: &[String], ) -> Result { - let device_names = Self::devices_needed(&request.zone)?; + let device_names = Self::devices_needed(&request)?; let (bootstrap_vnic, bootstrap_name_and_address) = - match self.bootstrap_address_needed(&request.zone)? { + match self.bootstrap_address_needed(&request)? { Some((vnic, address)) => { let name = vnic.name().to_string(); (Some(vnic), Some((name, address))) @@ -1067,20 +1404,26 @@ impl ServiceManager { let links: Vec; let links_need_link_local: Vec; (links, links_need_link_local) = - self.links_needed(&request.zone)?.into_iter().unzip(); - let opte_ports = self.opte_ports_needed(&request.zone).await?; - let limit_priv = Self::privs_needed(&request.zone); + self.links_needed(&request)?.into_iter().unzip(); + let opte_ports = self.opte_ports_needed(&request).await?; + let limit_priv = Self::privs_needed(&request); // If the zone is managing a particular dataset, plumb that // dataset into the zone. Additionally, construct a "unique enough" name // so we can create multiple zones of this type without collision. - let unique_name = request.zone.zone_name_unique_identifier(); - let datasets = request - .zone - .dataset - .iter() - .map(|d| zone::Dataset { name: d.name.full() }) - .collect::>(); + let unique_name = match &request { + ZoneArgs::Omicron(zone_config) => Some(zone_config.zone.id), + ZoneArgs::Switch(_) => None, + }; + let datasets: Vec<_> = match &request { + ZoneArgs::Omicron(zone_config) => zone_config + .zone + .dataset_name() + .map(|n| zone::Dataset { name: n.full() }) + .into_iter() + .collect(), + ZoneArgs::Switch(_) => vec![], + }; let devices: Vec = device_names .iter() @@ -1103,6 +1446,13 @@ impl ServiceManager { .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET)); } + let zone_type_str = match &request { + ZoneArgs::Omicron(zone_config) => { + zone_config.zone.zone_type.zone_type_str() + } + ZoneArgs::Switch(_) => "switch".to_string(), + }; + let mut zone_builder = ZoneBuilderFactory::default().builder(); if let Some(uuid) = unique_name { zone_builder = zone_builder.with_unique_name(uuid); @@ -1113,9 +1463,9 @@ impl ServiceManager { let installed_zone = zone_builder .with_log(self.inner.log.clone()) .with_underlay_vnic_allocator(&self.inner.underlay_vnic_allocator) - .with_zone_root_path(&request.root) + .with_zone_root_path(&request.root()) .with_zone_image_paths(zone_image_paths.as_slice()) - .with_zone_type(&request.zone.zone_type.to_string()) + .with_zone_type(&zone_type_str) .with_datasets(datasets.as_slice()) .with_filesystems(&filesystems) .with_data_links(&data_links) @@ -1130,8 +1480,16 @@ impl ServiceManager { // // These zones are self-assembling -- after they boot, there should // be no "zlogin" necessary to initialize. - match request.zone.zone_type { - ZoneType::Clickhouse => { + match &request { + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::Clickhouse { .. }, + underlay_address, + .. + }, + .. + }) => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; @@ -1140,8 +1498,7 @@ impl ServiceManager { let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); - assert_eq!(request.zone.addresses.len(), 1); - let listen_addr = &request.zone.addresses[0].to_string(); + let listen_addr = &underlay_address.to_string(); let listen_port = &CLICKHOUSE_PORT.to_string(); let config = PropertyGroupBuilder::new("config") @@ -1167,7 +1524,16 @@ impl ServiceManager { })?; return Ok(RunningZone::boot(installed_zone).await?); } - ZoneType::ClickhouseKeeper => { + + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseKeeper { .. }, + underlay_address, + .. + }, + .. + }) => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; @@ -1176,8 +1542,7 @@ impl ServiceManager { let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); - assert_eq!(request.zone.addresses.len(), 1); - let listen_addr = &request.zone.addresses[0].to_string(); + let listen_addr = &underlay_address.to_string(); let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); let config = PropertyGroupBuilder::new("config") @@ -1206,7 +1571,16 @@ impl ServiceManager { })?; return Ok(RunningZone::boot(installed_zone).await?); } - ZoneType::CockroachDb => { + + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::CockroachDb { .. }, + underlay_address, + .. + }, + .. + }) => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; @@ -1216,9 +1590,8 @@ impl ServiceManager { // Configure the CockroachDB service. let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); - assert_eq!(request.zone.addresses.len(), 1); let address = SocketAddr::new( - IpAddr::V6(request.zone.addresses[0]), + IpAddr::V6(*underlay_address), COCKROACH_PORT, ); let listen_addr = &address.ip().to_string(); @@ -1247,22 +1620,29 @@ impl ServiceManager { })?; return Ok(RunningZone::boot(installed_zone).await?); } - ZoneType::Crucible => { + + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::Crucible { dataset, .. }, + underlay_address, + .. + }, + .. + }) => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); - assert_eq!(request.zone.addresses.len(), 1); - let listen_addr = &request.zone.addresses[0].to_string(); + let listen_addr = &underlay_address.to_string(); let listen_port = &CRUCIBLE_PORT.to_string(); - let dataset_name = request - .zone - .dataset - .as_ref() - .map(|d| d.name.full()) - .expect("Crucible requires dataset"); + let dataset_name = DatasetName::new( + dataset.pool_name.clone(), + DatasetKind::Crucible, + ) + .full(); let uuid = &Uuid::new_v4().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) @@ -1287,15 +1667,23 @@ impl ServiceManager { })?; return Ok(RunningZone::boot(installed_zone).await?); } - ZoneType::CruciblePantry => { + + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::CruciblePantry { .. }, + underlay_address, + .. + }, + .. + }) => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); - assert_eq!(request.zone.addresses.len(), 1); - let listen_addr = &request.zone.addresses[0].to_string(); + let listen_addr = &underlay_address.to_string(); let listen_port = &CRUCIBLE_PANTRY_PORT.to_string(); let config = PropertyGroupBuilder::new("config") @@ -1317,6 +1705,7 @@ impl ServiceManager { let running_zone = RunningZone::boot(installed_zone).await?; return Ok(running_zone); } + _ => {} } @@ -1347,7 +1736,7 @@ impl ServiceManager { self.inner.log, "Ensuring bootstrap address {} exists in {} zone", bootstrap_address.to_string(), - request.zone.zone_type.to_string() + &zone_type_str, ); running_zone.ensure_bootstrap_address(*bootstrap_address).await?; info!( @@ -1368,7 +1757,14 @@ impl ServiceManager { })?; } - for addr in &request.zone.addresses { + let addresses = match &request { + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: OmicronZoneConfig { underlay_address, .. }, + .. + }) => std::slice::from_ref(underlay_address), + ZoneArgs::Switch(req) => &req.zone.addresses, + }; + for addr in addresses { if *addr == Ipv6Addr::LOCALHOST { continue; } @@ -1393,9 +1789,7 @@ impl ServiceManager { let sled_underlay_subnet = Ipv6Subnet::::new(info.underlay_address); - if request - .zone - .addresses + if addresses .iter() .any(|ip| sled_underlay_subnet.net().contains(*ip)) { @@ -1427,661 +1821,744 @@ impl ServiceManager { })?; } - for service in &request.zone.services { - // TODO: Related to - // https://github.com/oxidecomputer/omicron/pull/1124 , should we - // avoid importing this manifest? - debug!(self.inner.log, "importing manifest"); - - let smfh = SmfHelper::new(&running_zone, &service.details); - smfh.import_manifest()?; - - match &service.details { - ServiceType::Nexus { - internal_address, - external_tls, - external_dns_servers, - .. - } => { - info!(self.inner.log, "Setting up Nexus service"); - - let sled_info = self - .inner - .sled_info - .get() - .ok_or(Error::SledAgentNotReady)?; - - // While Nexus will be reachable via `external_ip`, it communicates - // atop an OPTE port which operates on a VPC private IP. OPTE will - // map the private IP to the external IP automatically. - let port_ip = running_zone - .ensure_address_for_port("public", 0) - .await? - .ip(); - - // Nexus takes a separate config file for parameters which - // cannot be known at packaging time. - let nexus_port = if *external_tls { 443 } else { 80 }; - let deployment_config = NexusDeploymentConfig { - id: request.zone.id, - rack_id: sled_info.rack_id, - techport_external_server_port: - NEXUS_TECHPORT_EXTERNAL_PORT, - - dropshot_external: ConfigDropshotWithTls { - tls: *external_tls, - dropshot: dropshot::ConfigDropshot { - bind_address: SocketAddr::new( - port_ip, nexus_port, - ), - // This has to be large enough to support: - // - bulk writes to disks - request_body_max_bytes: 8192 * 1024, + match &request { + ZoneArgs::Omicron(zone_config) => { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing this manifest? + debug!(self.inner.log, "importing manifest"); + + let smfh = + SmfHelper::new(&running_zone, &zone_config.zone.zone_type); + smfh.import_manifest()?; + + match &zone_config.zone.zone_type { + OmicronZoneType::Nexus { + internal_address, + external_tls, + external_dns_servers, + .. + } => { + info!(self.inner.log, "Setting up Nexus service"); + + let sled_info = self + .inner + .sled_info + .get() + .ok_or(Error::SledAgentNotReady)?; + + // While Nexus will be reachable via `external_ip`, it + // communicates atop an OPTE port which operates on a + // VPC private IP. OPTE will map the private IP to the + // external IP automatically. + let port_ip = running_zone + .ensure_address_for_port("public", 0) + .await? + .ip(); + + // Nexus takes a separate config file for parameters + // which cannot be known at packaging time. + let nexus_port = if *external_tls { 443 } else { 80 }; + let deployment_config = NexusDeploymentConfig { + id: zone_config.zone.id, + rack_id: sled_info.rack_id, + techport_external_server_port: + NEXUS_TECHPORT_EXTERNAL_PORT, + + dropshot_external: ConfigDropshotWithTls { + tls: *external_tls, + dropshot: dropshot::ConfigDropshot { + bind_address: SocketAddr::new( + port_ip, nexus_port, + ), + // This has to be large enough to support: + // - bulk writes to disks + request_body_max_bytes: 8192 * 1024, + default_handler_task_mode: + HandlerTaskMode::Detached, + }, + }, + dropshot_internal: dropshot::ConfigDropshot { + bind_address: (*internal_address).into(), + // This has to be large enough to support, among + // other things, the initial list of TLS + // certificates provided by the customer during + // rack setup. + request_body_max_bytes: 10 * 1024 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, }, - }, - dropshot_internal: dropshot::ConfigDropshot { - bind_address: (*internal_address).into(), - // This has to be large enough to support, among - // other things, the initial list of TLS - // certificates provided by the customer during rack - // setup. - request_body_max_bytes: 10 * 1024 * 1024, - default_handler_task_mode: - HandlerTaskMode::Detached, - }, - internal_dns: nexus_config::InternalDns::FromSubnet { - subnet: Ipv6Subnet::::new( - sled_info.underlay_address, + internal_dns: + nexus_config::InternalDns::FromSubnet { + subnet: Ipv6Subnet::::new( + sled_info.underlay_address, + ), + }, + database: nexus_config::Database::FromDns, + external_dns_servers: external_dns_servers.clone(), + }; + + // Copy the partial config file to the expected + // location. + let config_dir = Utf8PathBuf::from(format!( + "{}/var/svc/manifest/site/nexus", + running_zone.root() + )); + // The filename of a half-completed config, in need of + // parameters supplied at runtime. + const PARTIAL_LEDGER_FILENAME: &str = + "config-partial.toml"; + // The filename of a completed config, merging the + // partial config with additional appended parameters + // known at runtime. + const COMPLETE_LEDGER_FILENAME: &str = "config.toml"; + let partial_config_path = + config_dir.join(PARTIAL_LEDGER_FILENAME); + let config_path = + config_dir.join(COMPLETE_LEDGER_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::io_path(&config_path, err))?; + + // Serialize the configuration and append it into the + // file. + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("deployment".to_string(), serialized_cfg); + let config_str = + toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { + path: config_path.clone(), + err, + } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::io_path(&config_path, err))?; + file.write_all(b"\n\n") + .await + .map_err(|err| Error::io_path(&config_path, err))?; + file.write_all(config_str.as_bytes()) + .await + .map_err(|err| Error::io_path(&config_path, err))?; + } + + OmicronZoneType::ExternalDns { + http_address, + dns_address, + .. + } => { + info!( + self.inner.log, + "Setting up external-dns service" + ); + + // Like Nexus, we need to be reachable externally via + // `dns_address` but we don't listen on that address + // directly but instead on a VPC private IP. OPTE will + // en/decapsulate as appropriate. + let port_ip = running_zone + .ensure_address_for_port("public", 0) + .await? + .ip(); + let dns_address = + SocketAddr::new(port_ip, dns_address.port()); + + smfh.setprop( + "config/http_address", + format!( + "[{}]:{}", + http_address.ip(), + http_address.port(), ), - }, - database: nexus_config::Database::FromDns, - external_dns_servers: external_dns_servers.clone(), - }; + )?; + smfh.setprop( + "config/dns_address", + dns_address.to_string(), + )?; - // Copy the partial config file to the expected location. - let config_dir = Utf8PathBuf::from(format!( - "{}/var/svc/manifest/site/nexus", - running_zone.root() - )); - // The filename of a half-completed config, in need of parameters supplied at - // runtime. - const PARTIAL_LEDGER_FILENAME: &str = "config-partial.toml"; - // The filename of a completed config, merging the partial config with - // additional appended parameters known at runtime. - const COMPLETE_LEDGER_FILENAME: &str = "config.toml"; - let partial_config_path = - config_dir.join(PARTIAL_LEDGER_FILENAME); - let config_path = config_dir.join(COMPLETE_LEDGER_FILENAME); - tokio::fs::copy(partial_config_path, &config_path) - .await - .map_err(|err| Error::io_path(&config_path, err))?; - - // Serialize the configuration and append it into the file. - let serialized_cfg = - toml::Value::try_from(&deployment_config) - .expect("Cannot serialize config"); - let mut map = toml::map::Map::new(); - map.insert("deployment".to_string(), serialized_cfg); - let config_str = toml::to_string(&map).map_err(|err| { - Error::TomlSerialize { path: config_path.clone(), err } - })?; - let mut file = tokio::fs::OpenOptions::new() - .append(true) - .open(&config_path) - .await - .map_err(|err| Error::io_path(&config_path, err))?; - file.write_all(b"\n\n") - .await - .map_err(|err| Error::io_path(&config_path, err))?; - file.write_all(config_str.as_bytes()) - .await - .map_err(|err| Error::io_path(&config_path, err))?; - } - ServiceType::ExternalDns { - http_address, dns_address, .. - } => { - info!(self.inner.log, "Setting up external-dns service"); - - // Like Nexus, we need to be reachable externally via - // `dns_address` but we don't listen on that address - // directly but instead on a VPC private IP. OPTE will - // en/decapsulate as appropriate. - let port_ip = running_zone - .ensure_address_for_port("public", 0) - .await? - .ip(); - let dns_address = - SocketAddr::new(port_ip, dns_address.port()); - - smfh.setprop( - "config/http_address", - format!( - "[{}]:{}", - http_address.ip(), - http_address.port(), - ), - )?; - smfh.setprop( - "config/dns_address", - dns_address.to_string(), - )?; + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the + // service is enabled. + smfh.refresh()?; + } - // Refresh the manifest with the new properties we set, so - // they become "effective" properties when the service is - // enabled. - smfh.refresh()?; - } - ServiceType::InternalDns { - http_address, - dns_address, - gz_address, - gz_address_index, - } => { - info!(self.inner.log, "Setting up internal-dns service"); - - // Internal DNS zones require a special route through the - // global zone, since they are not on the same part of the - // underlay as most other services on this sled (the sled's - // subnet). - // - // We create an IP address in the dedicated portion of the - // underlay used for internal DNS servers, but we *also* - // add a number ("which DNS server is this") to ensure - // these addresses are given unique names. In the unlikely - // case that two internal DNS servers end up on the same - // machine (which is effectively a developer-only - // environment -- we wouldn't want this in prod!), they need - // to be given distinct names. - let addr_name = format!("internaldns{gz_address_index}"); - Zones::ensure_has_global_zone_v6_address( - self.inner.underlay_vnic.clone(), - *gz_address, - &addr_name, - ) - .map_err(|err| Error::GzAddress { - message: format!( + OmicronZoneType::InternalDns { + http_address, + dns_address, + gz_address, + gz_address_index, + .. + } => { + info!( + self.inner.log, + "Setting up internal-dns service" + ); + + // Internal DNS zones require a special route through + // the global zone, since they are not on the same part + // of the underlay as most other services on this sled + // (the sled's subnet). + // + // We create an IP address in the dedicated portion of + // the underlay used for internal DNS servers, but we + // *also* add a number ("which DNS server is this") to + // ensure these addresses are given unique names. In the + // unlikely case that two internal DNS servers end up on + // the same machine (which is effectively a + // developer-only environment -- we wouldn't want this + // in prod!), they need to be given distinct names. + let addr_name = + format!("internaldns{gz_address_index}"); + Zones::ensure_has_global_zone_v6_address( + self.inner.underlay_vnic.clone(), + *gz_address, + &addr_name, + ) + .map_err(|err| { + Error::GzAddress { + message: format!( "Failed to create address {} for Internal DNS zone", addr_name ), - err, - })?; - // If this address is in a new ipv6 prefix, notify maghemite so - // it can advertise it to other sleds. - self.advertise_prefix_of_address(*gz_address).await; - - running_zone.add_default_route(*gz_address).map_err( - |err| Error::ZoneCommand { - intent: "Adding Route".to_string(), - err, - }, - )?; + err, + } + })?; + // If this address is in a new ipv6 prefix, notify + // maghemite so it can advertise it to other sleds. + self.advertise_prefix_of_address(*gz_address).await; + + running_zone.add_default_route(*gz_address).map_err( + |err| Error::ZoneCommand { + intent: "Adding Route".to_string(), + err, + }, + )?; - smfh.setprop( - "config/http_address", - format!( - "[{}]:{}", - http_address.ip(), - http_address.port(), - ), - )?; - smfh.setprop( - "config/dns_address", - &format!( - "[{}]:{}", - dns_address.ip(), - dns_address.port(), - ), - )?; + smfh.setprop( + "config/http_address", + format!( + "[{}]:{}", + http_address.ip(), + http_address.port(), + ), + )?; + smfh.setprop( + "config/dns_address", + &format!( + "[{}]:{}", + dns_address.ip(), + dns_address.port(), + ), + )?; - // Refresh the manifest with the new properties we set, so - // they become "effective" properties when the service is - // enabled. - smfh.refresh()?; - } - ServiceType::Oximeter { address } => { - info!(self.inner.log, "Setting up oximeter service"); - smfh.setprop("config/id", request.zone.id)?; - smfh.setprop("config/address", address.to_string())?; - smfh.refresh()?; - } - ServiceType::ManagementGatewayService => { - info!(self.inner.log, "Setting up MGS service"); - smfh.setprop("config/id", request.zone.id)?; - - // Always tell MGS to listen on localhost so wicketd can - // contact it even before we have an underlay network. - smfh.addpropvalue( - "config/address", - &format!("[::1]:{MGS_PORT}"), - )?; + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the + // service is enabled. + smfh.refresh()?; + } - if let Some(address) = request.zone.addresses.get(0) { - // Don't use localhost twice - if *address != Ipv6Addr::LOCALHOST { - smfh.addpropvalue( - "config/address", - &format!("[{address}]:{MGS_PORT}"), - )?; - } + OmicronZoneType::Oximeter { address } => { + info!(self.inner.log, "Setting up oximeter service"); + smfh.setprop("config/id", zone_config.zone.id)?; + smfh.setprop("config/address", address.to_string())?; + smfh.refresh()?; } - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; + OmicronZoneType::BoundaryNtp { + ntp_servers, + dns_servers, + domain, + .. } + | OmicronZoneType::InternalNtp { + ntp_servers, + dns_servers, + domain, + .. + } => { + let boundary = matches!( + &zone_config.zone.zone_type, + OmicronZoneType::BoundaryNtp { .. } + ); + info!( + self.inner.log, + "Set up NTP service boundary={}, Servers={:?}", + boundary, + ntp_servers + ); - smfh.refresh()?; - } - ServiceType::SpSim => { - info!(self.inner.log, "Setting up Simulated SP service"); - } - ServiceType::Wicketd { baseboard } => { - info!(self.inner.log, "Setting up wicketd service"); + let sled_info = + if let Some(info) = self.inner.sled_info.get() { + info + } else { + return Err(Error::SledAgentNotReady); + }; - smfh.setprop( - "config/address", - &format!("[::1]:{WICKETD_PORT}"), - )?; + let rack_net = Ipv6Subnet::::new( + sled_info.underlay_address, + ) + .net(); - // If we're launching the switch zone, we'll have a - // bootstrap_address based on our call to - // `self.bootstrap_address_needed` (which always gives us an - // address for the switch zone. If we _don't_ have a - // bootstrap address, someone has requested wicketd in a - // non-switch zone; return an error. - let Some((_, bootstrap_address)) = - bootstrap_name_and_address - else { - return Err(Error::BadServiceRequest { - service: "wicketd".to_string(), - message: concat!( - "missing bootstrap address: ", - "wicketd can only be started in the ", - "switch zone", - ) - .to_string(), - }); - }; - smfh.setprop( - "config/artifact-address", - &format!( - "[{bootstrap_address}]:{BOOTSTRAP_ARTIFACT_PORT}" - ), - )?; - - smfh.setprop( - "config/mgs-address", - &format!("[::1]:{MGS_PORT}"), - )?; - - // We intentionally bind `nexus-proxy-address` to `::` so - // wicketd will serve this on all interfaces, particularly - // the tech port interfaces, allowing external clients to - // connect to this Nexus proxy. - smfh.setprop( - "config/nexus-proxy-address", - &format!("[::]:{WICKETD_NEXUS_PROXY_PORT}"), - )?; - if let Some(underlay_address) = self - .inner - .sled_info - .get() - .map(|info| info.underlay_address) - { - let rack_subnet = - Ipv6Subnet::::new(underlay_address); + smfh.setprop("config/allow", &format!("{}", rack_net))?; smfh.setprop( - "config/rack-subnet", - &rack_subnet.net().ip().to_string(), + "config/boundary", + if boundary { "true" } else { "false" }, )?; - } - let serialized_baseboard = - serde_json::to_string_pretty(&baseboard)?; - let serialized_baseboard_path = Utf8PathBuf::from(format!( - "{}/opt/oxide/baseboard.json", - running_zone.root() - )); - tokio::fs::write( - &serialized_baseboard_path, - &serialized_baseboard, - ) - .await - .map_err(|err| { - Error::io_path(&serialized_baseboard_path, err) - })?; - smfh.setprop( - "config/baseboard-file", - String::from("/opt/oxide/baseboard.json"), - )?; + if boundary { + // Configure OPTE port for boundary NTP + running_zone + .ensure_address_for_port("public", 0) + .await?; + } - smfh.refresh()?; - } - ServiceType::Dendrite { asic } => { - info!(self.inner.log, "Setting up dendrite service"); + smfh.delpropvalue("config/server", "*")?; + for server in ntp_servers { + smfh.addpropvalue("config/server", server)?; + } + self.configure_dns_client( + &running_zone, + &dns_servers, + &domain, + ) + .await?; - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; - smfh.setprop("config/sled_id", info.config.sled_id)?; - } else { - info!( - self.inner.log, - "no rack_id/sled_id available yet" - ); + smfh.refresh()?; + } + OmicronZoneType::Clickhouse { .. } + | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::CockroachDb { .. } + | OmicronZoneType::Crucible { .. } + | OmicronZoneType::CruciblePantry { .. } => { + panic!( + "{} is a service which exists as part of a \ + self-assembling zone", + &zone_config.zone.zone_type.zone_type_str(), + ) } + }; - smfh.delpropvalue("config/address", "*")?; - smfh.delpropvalue("config/dns_server", "*")?; - for address in &request.zone.addresses { - smfh.addpropvalue( - "config/address", - &format!("[{}]:{}", address, DENDRITE_PORT), - )?; - if *address != Ipv6Addr::LOCALHOST { - let az_prefix = - Ipv6Subnet::::new(*address); - for addr in Resolver::servers_from_subnet(az_prefix) + debug!(self.inner.log, "enabling service"); + smfh.enable()?; + } + ZoneArgs::Switch(request) => { + for service in &request.zone.services { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing this manifest? + debug!(self.inner.log, "importing manifest"); + + let smfh = SmfHelper::new(&running_zone, service); + smfh.import_manifest()?; + + match service { + SwitchService::ManagementGatewayService => { + info!(self.inner.log, "Setting up MGS service"); + smfh.setprop("config/id", request.zone.id)?; + + // Always tell MGS to listen on localhost so wicketd + // can contact it even before we have an underlay + // network. + smfh.addpropvalue( + "config/address", + &format!("[::1]:{MGS_PORT}"), + )?; + + if let Some(address) = request.zone.addresses.get(0) { - smfh.addpropvalue( - "config/dns_server", - &format!("{addr}"), - )?; + // Don't use localhost twice + if *address != Ipv6Addr::LOCALHOST { + smfh.addpropvalue( + "config/address", + &format!("[{address}]:{MGS_PORT}"), + )?; + } + } + + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_id", info.rack_id)?; } + + smfh.refresh()?; } - } - match asic { - DendriteAsic::TofinoAsic => { - // There should be exactly one device_name - // associated with this zone: the /dev path for - // the tofino ASIC. - let dev_cnt = device_names.len(); - if dev_cnt == 1 { + SwitchService::SpSim => { + info!( + self.inner.log, + "Setting up Simulated SP service" + ); + } + SwitchService::Wicketd { baseboard } => { + info!(self.inner.log, "Setting up wicketd service"); + + smfh.setprop( + "config/address", + &format!("[::1]:{WICKETD_PORT}"), + )?; + + // If we're launching the switch zone, we'll have a + // bootstrap_address based on our call to + // `self.bootstrap_address_needed` (which always + // gives us an address for the switch zone. If we + // _don't_ have a bootstrap address, someone has + // requested wicketd in a non-switch zone; return an + // error. + let Some((_, bootstrap_address)) = + bootstrap_name_and_address + else { + return Err(Error::BadServiceRequest { + service: "wicketd".to_string(), + message: concat!( + "missing bootstrap address: ", + "wicketd can only be started in the ", + "switch zone", + ) + .to_string(), + }); + }; + smfh.setprop( + "config/artifact-address", + &format!( + "[{bootstrap_address}]:{BOOTSTRAP_ARTIFACT_PORT}" + ), + )?; + + smfh.setprop( + "config/mgs-address", + &format!("[::1]:{MGS_PORT}"), + )?; + + // We intentionally bind `nexus-proxy-address` to + // `::` so wicketd will serve this on all + // interfaces, particularly the tech port + // interfaces, allowing external clients to connect + // to this Nexus proxy. + smfh.setprop( + "config/nexus-proxy-address", + &format!("[::]:{WICKETD_NEXUS_PROXY_PORT}"), + )?; + if let Some(underlay_address) = self + .inner + .sled_info + .get() + .map(|info| info.underlay_address) + { + let rack_subnet = Ipv6Subnet::::new( + underlay_address, + ); smfh.setprop( - "config/dev_path", - device_names[0].clone(), + "config/rack-subnet", + &rack_subnet.net().ip().to_string(), )?; - } else { - return Err(Error::SledLocalZone( - anyhow::anyhow!( - "{dev_cnt} devices needed for tofino asic" - ), - )); } + + let serialized_baseboard = + serde_json::to_string_pretty(&baseboard)?; + let serialized_baseboard_path = + Utf8PathBuf::from(format!( + "{}/opt/oxide/baseboard.json", + running_zone.root() + )); + tokio::fs::write( + &serialized_baseboard_path, + &serialized_baseboard, + ) + .await + .map_err(|err| { + Error::io_path(&serialized_baseboard_path, err) + })?; smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/sidecar_config.toml", + "config/baseboard-file", + String::from("/opt/oxide/baseboard.json"), )?; - let sidecar_revision = - match self.inner.sidecar_revision { - SidecarRevision::Physical(ref rev) => rev, - _ => { - return Err(Error::SidecarRevision( - anyhow::anyhow!( - "expected physical sidecar revision" - ), - )) - } - }; - smfh.setprop("config/board_rev", sidecar_revision)?; + + smfh.refresh()?; } - DendriteAsic::TofinoStub => smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/model_config.toml", - )?, - asic @ (DendriteAsic::SoftNpuZone - | DendriteAsic::SoftNpuPropolisDevice) => { - if asic == &DendriteAsic::SoftNpuZone { - smfh.setprop("config/mgmt", "uds")?; + SwitchService::Dendrite { asic } => { + info!( + self.inner.log, + "Setting up dendrite service" + ); + + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_id", info.rack_id)?; smfh.setprop( - "config/uds_path", - "/opt/softnpu/stuff", + "config/sled_id", + info.config.sled_id, )?; + } else { + info!( + self.inner.log, + "no rack_id/sled_id available yet" + ); } - if asic == &DendriteAsic::SoftNpuPropolisDevice { - smfh.setprop("config/mgmt", "uart")?; + + smfh.delpropvalue("config/address", "*")?; + smfh.delpropvalue("config/dns_server", "*")?; + for address in &request.zone.addresses { + smfh.addpropvalue( + "config/address", + &format!("[{}]:{}", address, DENDRITE_PORT), + )?; + if *address != Ipv6Addr::LOCALHOST { + let az_prefix = + Ipv6Subnet::::new(*address); + for addr in + Resolver::servers_from_subnet(az_prefix) + { + smfh.addpropvalue( + "config/dns_server", + &format!("{addr}"), + )?; + } + } } - let s = match self.inner.sidecar_revision { - SidecarRevision::SoftZone(ref s) => s, - SidecarRevision::SoftPropolis(ref s) => s, - _ => { - return Err(Error::SidecarRevision( - anyhow::anyhow!( - "expected soft sidecar revision" - ), - )) + match asic { + DendriteAsic::TofinoAsic => { + // There should be exactly one device_name + // associated with this zone: the /dev path + // for the tofino ASIC. + let dev_cnt = device_names.len(); + if dev_cnt == 1 { + smfh.setprop( + "config/dev_path", + device_names[0].clone(), + )?; + } else { + return Err(Error::SledLocalZone( + anyhow::anyhow!( + "{dev_cnt} devices needed \ + for tofino asic" + ), + )); + } + smfh.setprop( + "config/port_config", + "/opt/oxide/dendrite/misc/sidecar_config.toml", + )?; + let sidecar_revision = + match self.inner.sidecar_revision { + SidecarRevision::Physical(ref rev) => rev, + _ => { + return Err(Error::SidecarRevision( + anyhow::anyhow!( + "expected physical \ + sidecar revision" + ), + )) + } + }; + smfh.setprop("config/board_rev", sidecar_revision)?; + } + DendriteAsic::TofinoStub => smfh.setprop( + "config/port_config", + "/opt/oxide/dendrite/misc/model_config.toml", + )?, + asic @ (DendriteAsic::SoftNpuZone + | DendriteAsic::SoftNpuPropolisDevice) => { + if asic == &DendriteAsic::SoftNpuZone { + smfh.setprop("config/mgmt", "uds")?; + smfh.setprop( + "config/uds_path", + "/opt/softnpu/stuff", + )?; + } + if asic == &DendriteAsic::SoftNpuPropolisDevice { + smfh.setprop("config/mgmt", "uart")?; + } + let s = match self.inner.sidecar_revision { + SidecarRevision::SoftZone(ref s) => s, + SidecarRevision::SoftPropolis(ref s) => s, + _ => { + return Err(Error::SidecarRevision( + anyhow::anyhow!( + "expected soft sidecar \ + revision" + ), + )) + } + }; + smfh.setprop( + "config/front_ports", + &s.front_port_count.to_string(), + )?; + smfh.setprop( + "config/rear_ports", + &s.rear_port_count.to_string(), + )?; + smfh.setprop( + "config/port_config", + "/opt/oxide/dendrite/misc/softnpu_single_sled_config.toml", + )? } }; - smfh.setprop( - "config/front_ports", - &s.front_port_count.to_string(), - )?; - smfh.setprop( - "config/rear_ports", - &s.rear_port_count.to_string(), - )?; - smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/softnpu_single_sled_config.toml", - )? + smfh.refresh()?; } - }; - smfh.refresh()?; - } - ServiceType::Tfport { pkt_source, asic } => { - info!(self.inner.log, "Setting up tfport service"); + SwitchService::Tfport { pkt_source, asic } => { + info!(self.inner.log, "Setting up tfport service"); - let is_gimlet = is_gimlet().map_err(|e| { - Error::Underlay(underlay::Error::SystemDetection(e)) - })?; + let is_gimlet = is_gimlet().map_err(|e| { + Error::Underlay( + underlay::Error::SystemDetection(e), + ) + })?; - if is_gimlet { - // Collect the prefixes for each techport. - let techport_prefixes = match bootstrap_name_and_address - .as_ref() - { - Some((_, addr)) => { - Self::bootstrap_addr_to_techport_prefixes(addr) + if is_gimlet { + // Collect the prefixes for each techport. + let nameaddr = + bootstrap_name_and_address.as_ref(); + let techport_prefixes = match nameaddr { + Some((_, addr)) => { + Self::bootstrap_addr_to_techport_prefixes(addr) + } + None => { + return Err(Error::BadServiceRequest { + service: "tfport".into(), + message: "bootstrap addr missing" + .into(), + }); + } + }; + + for (i, prefix) in + techport_prefixes.into_iter().enumerate() + { + // Each `prefix` is an `Ipv6Subnet` + // including a netmask. Stringify just the + // network address, without the mask. + smfh.setprop( + format!("config/techport{i}_prefix"), + prefix.net().network().to_string(), + )?; + } + smfh.setprop("config/pkt_source", pkt_source)?; } - None => { - return Err(Error::BadServiceRequest { - service: "tfport".into(), - message: "bootstrap addr missing".into(), - }); + if asic == &DendriteAsic::SoftNpuZone { + smfh.setprop("config/flags", "--sync-only")?; + } + if asic == &DendriteAsic::SoftNpuPropolisDevice { + smfh.setprop("config/pkt_source", pkt_source)?; } - }; - - for (i, prefix) in - techport_prefixes.into_iter().enumerate() - { - // Each `prefix` is an `Ipv6Subnet` including a netmask. - // Stringify just the network address, without the mask. smfh.setprop( - format!("config/techport{i}_prefix"), - prefix.net().network().to_string(), + "config/host", + &format!("[{}]", Ipv6Addr::LOCALHOST), + )?; + smfh.setprop( + "config/port", + &format!("{}", DENDRITE_PORT), )?; - } - smfh.setprop("config/pkt_source", pkt_source)?; - } - if asic == &DendriteAsic::SoftNpuZone { - smfh.setprop("config/flags", "--sync-only")?; - } - if asic == &DendriteAsic::SoftNpuPropolisDevice { - smfh.setprop("config/pkt_source", pkt_source)?; - } - smfh.setprop( - "config/host", - &format!("[{}]", Ipv6Addr::LOCALHOST), - )?; - smfh.setprop("config/port", &format!("{}", DENDRITE_PORT))?; - - smfh.refresh()?; - } - ServiceType::BoundaryNtp { - ntp_servers, - dns_servers, - domain, - .. - } - | ServiceType::InternalNtp { - ntp_servers, - dns_servers, - domain, - .. - } => { - let boundary = matches!( - service.details, - ServiceType::BoundaryNtp { .. } - ); - info!( - self.inner.log, - "Set up NTP service boundary={}, Servers={:?}", - boundary, - ntp_servers - ); - - let sled_info = - if let Some(info) = self.inner.sled_info.get() { - info - } else { - return Err(Error::SledAgentNotReady); - }; - - let rack_net = Ipv6Subnet::::new( - sled_info.underlay_address, - ) - .net(); - - smfh.setprop("config/allow", &format!("{}", rack_net))?; - smfh.setprop( - "config/boundary", - if boundary { "true" } else { "false" }, - )?; - - if boundary { - // Configure OPTE port for boundary NTP - running_zone - .ensure_address_for_port("public", 0) - .await?; - } - - smfh.delpropvalue("config/server", "*")?; - for server in ntp_servers { - smfh.addpropvalue("config/server", server)?; - } - self.configure_dns_client( - &running_zone, - dns_servers, - &domain, - ) - .await?; - - smfh.refresh()?; - } - ServiceType::Uplink => { - // Nothing to do here - this service is special and - // configured in `ensure_switch_zone_uplinks_configured` - } - ServiceType::Mgd => { - info!(self.inner.log, "Setting up mgd service"); - smfh.setprop("config/admin_host", "::")?; - smfh.refresh()?; - } - ServiceType::MgDdm { mode } => { - info!(self.inner.log, "Setting up mg-ddm service"); - smfh.setprop("config/mode", &mode)?; - smfh.setprop("config/admin_host", "::")?; + smfh.refresh()?; + } + SwitchService::Uplink => { + // Nothing to do here - this service is special and + // configured in + // `ensure_switch_zone_uplinks_configured` + } + SwitchService::Mgd => { + info!(self.inner.log, "Setting up mgd service"); + smfh.setprop("config/admin_host", "::")?; + smfh.refresh()?; + } + SwitchService::MgDdm { mode } => { + info!(self.inner.log, "Setting up mg-ddm service"); - let is_gimlet = is_gimlet().map_err(|e| { - Error::Underlay(underlay::Error::SystemDetection(e)) - })?; + smfh.setprop("config/mode", &mode)?; + smfh.setprop("config/admin_host", "::")?; - let maghemite_interfaces: Vec = if is_gimlet { - (0..32) - .map(|i| { - // See the `tfport_name` function for how - // tfportd names the addrconf it creates. - // Right now, that's `tfportrear[0-31]_0` - // for all rear ports, which is what we're - // directing ddmd to listen for - // advertisements on. - // - // This may grow in a multi-rack future to - // include a subset of "front" ports too, - // when racks are cabled together. - AddrObject::new( - &format!("tfportrear{}_0", i), - IPV6_LINK_LOCAL_NAME, - ) - .unwrap() - }) - .collect() - } else { - self.inner - .switch_zone_maghemite_links - .iter() - .map(|i| { - AddrObject::new( - &i.to_string(), - IPV6_LINK_LOCAL_NAME, + let is_gimlet = is_gimlet().map_err(|e| { + Error::Underlay( + underlay::Error::SystemDetection(e), ) - .unwrap() - }) - .collect() - }; + })?; + + let maghemite_interfaces: Vec = + if is_gimlet { + (0..32) + .map(|i| { + // See the `tfport_name` function + // for how tfportd names the + // addrconf it creates. Right now, + // that's `tfportrear[0-31]_0` for + // all rear ports, which is what + // we're directing ddmd to listen + // for advertisements on. + // + // This may grow in a multi-rack + // future to include a subset of + // "front" ports too, when racks are + // cabled together. + AddrObject::new( + &format!("tfportrear{}_0", i), + IPV6_LINK_LOCAL_NAME, + ) + .unwrap() + }) + .collect() + } else { + self.inner + .switch_zone_maghemite_links + .iter() + .map(|i| { + AddrObject::new( + &i.to_string(), + IPV6_LINK_LOCAL_NAME, + ) + .unwrap() + }) + .collect() + }; - smfh.setprop( - "config/interfaces", - // `svccfg setprop` requires a list of values to be - // enclosed in `()`, and each string value to be - // enclosed in `""`. Note that we do _not_ need to - // escape the parentheses, since this is not passed - // through a shell, but directly to `exec(2)` in the - // zone. - format!( - "({})", - maghemite_interfaces - .iter() - .map(|interface| format!(r#""{}""#, interface)) - .join(" "), - ), - )?; + smfh.setprop( + "config/interfaces", + // `svccfg setprop` requires a list of values to + // be enclosed in `()`, and each string value to + // be enclosed in `""`. Note that we do _not_ + // need to escape the parentheses, since this is + // not passed through a shell, but directly to + // `exec(2)` in the zone. + format!( + "({})", + maghemite_interfaces + .iter() + .map(|interface| format!( + r#""{}""#, + interface + )) + .join(" "), + ), + )?; - if is_gimlet { - // Ddm for a scrimlet needs to be configured to talk to - // dendrite - smfh.setprop("config/dpd_host", "[::1]")?; - smfh.setprop("config/dpd_port", DENDRITE_PORT)?; + if is_gimlet { + // Ddm for a scrimlet needs to be configured to + // talk to dendrite + smfh.setprop("config/dpd_host", "[::1]")?; + smfh.setprop("config/dpd_port", DENDRITE_PORT)?; + } + smfh.setprop("config/dendrite", "true")?; + + smfh.refresh()?; + } } - smfh.setprop("config/dendrite", "true")?; - smfh.refresh()?; - } - ServiceType::Crucible { .. } - | ServiceType::CruciblePantry { .. } - | ServiceType::CockroachDb { .. } - | ServiceType::Clickhouse { .. } - | ServiceType::ClickhouseKeeper { .. } => { - panic!( - "{} is a service which exists as part of a self-assembling zone", - service.details, - ) + debug!(self.inner.log, "enabling service"); + smfh.enable()?; } } - - debug!(self.inner.log, "enabling service"); - smfh.enable()?; - } + }; Ok(running_zone) } // Populates `existing_zones` according to the requests in `services`. - async fn initialize_services_locked( + async fn initialize_omicron_zones_locked( &self, existing_zones: &mut BTreeMap, - requests: &Vec, + requests: &Vec, ) -> Result<(), Error> { if let Some(name) = requests .iter() @@ -2098,7 +2575,7 @@ impl ServiceManager { let futures = requests.iter().map(|request| { async move { self.initialize_zone( - request, + ZoneArgs::Omicron(request), // filesystems= &[], // data_links= @@ -2156,74 +2633,133 @@ impl ServiceManager { Err(BundleError::NoSuchZone { name: name.to_string() }) } - /// Ensures that particular services should be initialized. + /// Returns the current Omicron zone configuration + pub async fn omicron_zones_list( + &self, + ) -> Result { + let log = &self.inner.log; + + // We need to take the lock in order for the information in the ledger + // to be up-to-date. + let _existing_zones = self.inner.zones.lock().await; + + // Read the existing set of services from the ledger. + let zone_ledger_paths = self.all_omicron_zone_ledgers().await; + let ledger_data = match Ledger::::new( + log, + zone_ledger_paths.clone(), + ) + .await + { + Some(ledger) => ledger.data().clone(), + None => OmicronZonesConfigLocal::initial(), + }; + + Ok(ledger_data.to_omicron_zones_config()) + } + + /// Ensures that particular Omicron zones are running /// /// These services will be instantiated by this function, and will be /// recorded to a local file to ensure they start automatically on next /// boot. - pub async fn ensure_all_services_persistent( + pub async fn ensure_all_omicron_zones_persistent( &self, - request: ServiceEnsureBody, + request: OmicronZonesConfig, ) -> Result<(), Error> { let log = &self.inner.log; let mut existing_zones = self.inner.zones.lock().await; // Read the existing set of services from the ledger. - let service_paths = self.all_service_ledgers().await; - let mut ledger = - match Ledger::::new(log, service_paths.clone()) - .await - { - Some(ledger) => ledger, - None => Ledger::::new_with( - log, - service_paths.clone(), - AllZoneRequests::default(), - ), - }; - let ledger_zone_requests = ledger.data_mut(); + let zone_ledger_paths = self.all_omicron_zone_ledgers().await; + let mut ledger = match Ledger::::new( + log, + zone_ledger_paths.clone(), + ) + .await + { + Some(ledger) => ledger, + None => Ledger::::new_with( + log, + zone_ledger_paths.clone(), + OmicronZonesConfigLocal::initial(), + ), + }; + + let ledger_zone_config = ledger.data_mut(); + debug!(log, "ensure_all_omicron_zones_persistent"; + "request_generation" => request.generation.to_string(), + "ledger_generation" => + ledger_zone_config.omicron_generation.to_string(), + ); + + // Absolutely refuse to downgrade the configuration. + if ledger_zone_config.omicron_generation > request.generation { + return Err(Error::RequestedConfigOutdated { + requested: request.generation, + current: ledger_zone_config.omicron_generation, + }); + } - let mut zone_requests = self - .ensure_all_services( + // If the generation is the same as what we're running, but the contents + // aren't, that's a problem, too. + if ledger_zone_config.omicron_generation == request.generation + && ledger_zone_config.clone().to_omicron_zones_config().zones + != request.zones + { + return Err(Error::RequestedConfigConflicts(request.generation)); + } + + let new_config = self + .ensure_all_omicron_zones( &mut existing_zones, - ledger_zone_requests, + Some(ledger_zone_config), request, + |_| true, ) .await?; - // Update the services in the ledger and write it back to both M.2s - ledger_zone_requests.requests.clear(); - ledger_zone_requests.requests.append(&mut zone_requests.requests); + // Update the zones in the ledger and write it back to both M.2s + *ledger_zone_config = new_config; ledger.commit().await?; Ok(()) } - // Ensures that only the following services are running. + // Ensures that only the following Omicron zones are running. // // Does not record any information such that these services are // re-instantiated on boot. - async fn ensure_all_services( + async fn ensure_all_omicron_zones( &self, + // The MutexGuard here attempts to ensure that the caller has the right + // lock held when calling this function. existing_zones: &mut MutexGuard<'_, BTreeMap>, - old_request: &AllZoneRequests, - request: ServiceEnsureBody, - ) -> Result { + old_config: Option<&OmicronZonesConfigLocal>, + new_request: OmicronZonesConfig, + filter: F, + ) -> Result + where + F: Fn(&OmicronZoneConfig) -> bool, + { let log = &self.inner.log; // Do some data-normalization to ensure we can compare the "requested // set" vs the "existing set" as HashSets. - let old_services_set: HashSet = HashSet::from_iter( - old_request.requests.iter().map(|r| r.zone.clone()), - ); - let requested_services_set = - HashSet::from_iter(request.services.into_iter()); + let old_zones_set: HashSet = old_config + .map(|old_config| { + HashSet::from_iter( + old_config.zones.iter().map(|z| z.zone.clone()), + ) + }) + .unwrap_or_else(HashSet::new); + let requested_zones_set = + HashSet::from_iter(new_request.zones.into_iter().filter(filter)); let zones_to_be_removed = - old_services_set.difference(&requested_services_set); - let zones_to_be_added = - requested_services_set.difference(&old_services_set); + old_zones_set.difference(&requested_zones_set); + let zones_to_be_added = requested_zones_set.difference(&old_zones_set); // Destroy zones that should not be running for zone in zones_to_be_removed { @@ -2256,13 +2792,13 @@ impl ServiceManager { } // Create zones that should be running - let mut zone_requests = AllZoneRequests::default(); let all_u2_roots = self .inner .storage .get_latest_resources() .await .all_u2_mountpoints(ZONE_DATASET); + let mut new_zones = Vec::new(); for zone in zones_to_be_added { // Check if we think the zone should already be running let name = zone.zone_name(); @@ -2294,6 +2830,7 @@ impl ServiceManager { } } } + // For each new zone request, we pick an arbitrary U.2 to store // the zone filesystem. Note: This isn't known to Nexus right now, // so it's a local-to-sled decision. @@ -2306,22 +2843,27 @@ impl ServiceManager { .ok_or_else(|| Error::U2NotFound)? .clone(); - zone_requests - .requests - .push(ZoneRequest { zone: zone.clone(), root }); + new_zones.push(OmicronZoneConfigLocal { zone: zone.clone(), root }); } - self.initialize_services_locked( - existing_zones, - &zone_requests.requests, - ) - .await?; - for old_zone in &old_request.requests { - if requested_services_set.contains(&old_zone.zone) { - zone_requests.requests.push(old_zone.clone()); + self.initialize_omicron_zones_locked(existing_zones, &new_zones) + .await?; + + if let Some(old_config) = old_config { + for old_zone in &old_config.zones { + if requested_zones_set.contains(&old_zone.zone) { + new_zones.push(old_zone.clone()); + } } } - Ok(zone_requests) + + Ok(OmicronZonesConfigLocal { + omicron_generation: new_request.generation, + ledger_generation: old_config + .map(|c| c.ledger_generation) + .unwrap_or_else(Generation::new), + zones: new_zones, + }) } pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { @@ -2527,7 +3069,8 @@ impl ServiceManager { let mut data_links: Vec = vec![]; let services = match self.inner.sled_mode { - // A pure gimlet sled should not be trying to activate a switch zone. + // A pure gimlet sled should not be trying to activate a switch + // zone. SledMode::Gimlet => { return Err(Error::SledLocalZone(anyhow::anyhow!( "attempted to activate switch zone on non-scrimlet sled" @@ -2538,16 +3081,16 @@ impl ServiceManager { SledMode::Auto | SledMode::Scrimlet { asic: DendriteAsic::TofinoAsic } => { vec![ - ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic }, - ServiceType::ManagementGatewayService, - ServiceType::Tfport { + SwitchService::Dendrite { asic: DendriteAsic::TofinoAsic }, + SwitchService::ManagementGatewayService, + SwitchService::Tfport { pkt_source: "tfpkt0".to_string(), asic: DendriteAsic::TofinoAsic, }, - ServiceType::Uplink, - ServiceType::Wicketd { baseboard }, - ServiceType::Mgd, - ServiceType::MgDdm { mode: "transit".to_string() }, + SwitchService::Uplink, + SwitchService::Wicketd { baseboard }, + SwitchService::Mgd, + SwitchService::MgDdm { mode: "transit".to_string() }, ] } @@ -2556,17 +3099,17 @@ impl ServiceManager { } => { data_links = vec!["vioif0".to_owned()]; vec![ - ServiceType::Dendrite { asic }, - ServiceType::ManagementGatewayService, - ServiceType::Uplink, - ServiceType::Wicketd { baseboard }, - ServiceType::Mgd, - ServiceType::MgDdm { mode: "transit".to_string() }, - ServiceType::Tfport { + SwitchService::Dendrite { asic }, + SwitchService::ManagementGatewayService, + SwitchService::Uplink, + SwitchService::Wicketd { baseboard }, + SwitchService::Mgd, + SwitchService::MgDdm { mode: "transit".to_string() }, + SwitchService::Tfport { pkt_source: "vioif0".to_string(), asic, }, - ServiceType::SpSim, + SwitchService::SpSim, ] } @@ -2586,17 +3129,17 @@ impl ServiceManager { data_links = Dladm::get_simulated_tfports()?; } vec![ - ServiceType::Dendrite { asic }, - ServiceType::ManagementGatewayService, - ServiceType::Uplink, - ServiceType::Wicketd { baseboard }, - ServiceType::Mgd, - ServiceType::MgDdm { mode: "transit".to_string() }, - ServiceType::Tfport { + SwitchService::Dendrite { asic }, + SwitchService::ManagementGatewayService, + SwitchService::Uplink, + SwitchService::Wicketd { baseboard }, + SwitchService::Mgd, + SwitchService::MgDdm { mode: "transit".to_string() }, + SwitchService::Tfport { pkt_source: "tfpkt0".to_string(), asic, }, - ServiceType::SpSim, + SwitchService::SpSim, ] } }; @@ -2605,19 +3148,10 @@ impl ServiceManager { if let Some((ip, _)) = underlay_info { vec![ip] } else { vec![] }; addresses.push(Ipv6Addr::LOCALHOST); - let request = ServiceZoneRequest { - id: Uuid::new_v4(), - zone_type: ZoneType::Switch, - addresses, - dataset: None, - services: services - .into_iter() - .map(|s| ServiceZoneService { id: Uuid::new_v4(), details: s }) - .collect(), - }; + let request = + SwitchZoneConfig { id: Uuid::new_v4(), addresses, services }; self.ensure_zone( - ZoneType::Switch, // request= Some(request), // filesystems= @@ -2685,7 +3219,7 @@ impl ServiceManager { } }; - let smfh = SmfHelper::new(&zone, &ServiceType::Uplink); + let smfh = SmfHelper::new(&zone, &SwitchService::Uplink); // We want to delete all the properties in the `uplinks` group, but we // don't know their names, so instead we'll delete and recreate the @@ -2710,7 +3244,6 @@ impl ServiceManager { /// Ensures that no switch zone is active. pub async fn deactivate_switch(&self) -> Result<(), Error> { self.ensure_zone( - ZoneType::Switch, // request= None, // filesystems= @@ -2727,12 +3260,11 @@ impl ServiceManager { fn start_zone( self, zone: &mut SledLocalZone, - request: ServiceZoneRequest, + request: SwitchZoneConfig, filesystems: Vec, data_links: Vec, ) { let (exit_tx, exit_rx) = oneshot::channel(); - let zone_type = request.zone_type.clone(); *zone = SledLocalZone::Initializing { request, filesystems, @@ -2740,7 +3272,7 @@ impl ServiceManager { worker: Some(Task { exit_tx, initializer: tokio::task::spawn(async move { - self.initialize_zone_loop(zone_type, exit_rx).await + self.initialize_zone_loop(exit_rx).await }), }), }; @@ -2749,21 +3281,14 @@ impl ServiceManager { // Moves the current state to align with the "request". async fn ensure_zone( &self, - zone_type: ZoneType, - request: Option, + request: Option, filesystems: Vec, data_links: Vec, ) -> Result<(), Error> { let log = &self.inner.log; - let mut sled_zone; - match zone_type { - ZoneType::Switch => { - sled_zone = self.inner.switch_zone.lock().await; - } - _ => panic!("Unhandled zone type"), - } - let zone_typestr = zone_type.to_string(); + let mut sled_zone = self.inner.switch_zone.lock().await; + let zone_typestr = "switch"; match (&mut *sled_zone, request) { (SledLocalZone::Disabled, Some(request)) => { @@ -2832,10 +3357,10 @@ impl ServiceManager { } for service in &request.services { - let smfh = SmfHelper::new(&zone, &service.details); + let smfh = SmfHelper::new(&zone, service); - match &service.details { - ServiceType::ManagementGatewayService => { + match service { + SwitchService::ManagementGatewayService => { // Remove any existing `config/address` values // without deleting the property itself. smfh.delpropvalue("config/address", "*")?; @@ -2853,8 +3378,9 @@ impl ServiceManager { &format!("[{address}]:{MGS_PORT}"), )?; - // It should be impossible for the `sled_info` not to be set here, - // as the underlay is set at the same time. + // It should be impossible for the `sled_info` not + // to be set here, as the underlay is set at the + // same time. if let Some(info) = self.inner.sled_info.get() { smfh.setprop("config/rack_id", info.rack_id)?; } else { @@ -2869,7 +3395,7 @@ impl ServiceManager { smfh.refresh()?; } - ServiceType::Dendrite { .. } => { + SwitchService::Dendrite { .. } => { info!(self.inner.log, "configuring dendrite zone"); if let Some(info) = self.inner.sled_info.get() { smfh.setprop("config/rack_id", info.rack_id)?; @@ -2905,7 +3431,7 @@ impl ServiceManager { } smfh.refresh()?; } - ServiceType::Wicketd { .. } => { + SwitchService::Wicketd { .. } => { if let Some(&address) = first_address { let rack_subnet = Ipv6Subnet::::new(address); @@ -2928,15 +3454,16 @@ impl ServiceManager { ); } } - ServiceType::Tfport { .. } => { + SwitchService::Tfport { .. } => { // Since tfport and dpd communicate using localhost, - // the tfport service shouldn't need to be restarted. + // the tfport service shouldn't need to be + // restarted. } - ServiceType::Uplink { .. } => { + SwitchService::Uplink { .. } => { // Only configured in // `ensure_switch_zone_uplinks_configured` } - ServiceType::MgDdm { mode } => { + SwitchService::MgDdm { mode } => { smfh.delpropvalue("config/mode", "*")?; smfh.addpropvalue("config/mode", &mode)?; smfh.refresh()?; @@ -2985,52 +3512,28 @@ impl ServiceManager { // switch zone were on a U.2 device we would not be able to run RSS, as // we could not create the U.2 disks due to lack of encryption. To break // the cycle we put the switch zone root fs on the ramdisk. - let root = if request.zone_type == ZoneType::Switch { - Utf8PathBuf::from(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT) - } else { - let all_u2_roots = self - .inner - .storage - .get_latest_resources() - .await - .all_u2_mountpoints(ZONE_DATASET); - let mut rng = rand::rngs::StdRng::from_entropy(); - all_u2_roots - .choose(&mut rng) - .ok_or_else(|| Error::U2NotFound)? - .clone() - }; - - let request = ZoneRequest { zone: request.clone(), root }; + let root = Utf8PathBuf::from(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT); + let zone_request = + SwitchZoneConfigLocal { root, zone: request.clone() }; + let zone_args = ZoneArgs::Switch(&zone_request); let zone = - self.initialize_zone(&request, filesystems, data_links).await?; - *sled_zone = - SledLocalZone::Running { request: request.zone.clone(), zone }; + self.initialize_zone(zone_args, filesystems, data_links).await?; + *sled_zone = SledLocalZone::Running { request: request.clone(), zone }; Ok(()) } // Body of a tokio task responsible for running until the switch zone is // inititalized, or it has been told to stop. - async fn initialize_zone_loop( - &self, - zone_type: ZoneType, - mut exit_rx: oneshot::Receiver<()>, - ) { + async fn initialize_zone_loop(&self, mut exit_rx: oneshot::Receiver<()>) { loop { { - let mut sled_zone; - match zone_type { - ZoneType::Switch => { - sled_zone = self.inner.switch_zone.lock().await; - } - _ => panic!("Unhandled zone type"), - } + let mut sled_zone = self.inner.switch_zone.lock().await; match self.try_initialize_sled_local_zone(&mut sled_zone).await { Ok(()) => return, Err(e) => warn!( self.inner.log, - "Failed to initialize {zone_type}: {e}" + "Failed to initialize switch zone: {e}" ), } } @@ -3050,7 +3553,6 @@ impl ServiceManager { #[cfg(test)] mod test { use super::*; - use crate::params::{ServiceZoneService, ZoneType}; use illumos_utils::zpool::ZpoolName; use illumos_utils::{ dladm::{ @@ -3154,27 +3656,96 @@ mod test { ] } + // Configures our mock implementations to work for cases where we configure + // multiple zones in one `ensure_all_omicron_zones_persistent()` call. + // + // This is looser than the expectations created by ensure_new_service() + // because these functions may return any number of times. + fn expect_new_services() -> Vec> { + illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst); + // Create a VNIC + let create_vnic_ctx = MockDladm::create_vnic_context(); + create_vnic_ctx.expect().returning( + |physical_link: &Etherstub, _, _, _, _| { + assert_eq!(&physical_link.0, &UNDERLAY_ETHERSTUB_NAME); + Ok(()) + }, + ); + + // Install the Omicron Zone + let install_ctx = MockZones::install_omicron_zone_context(); + install_ctx.expect().returning(|_, _, name, _, _, _, _, _, _| { + assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + Ok(()) + }); + + // Boot the zone. + let boot_ctx = MockZones::boot_context(); + boot_ctx.expect().returning(|name| { + assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + Ok(()) + }); + + // After calling `MockZones::boot`, `RunningZone::boot` will then look + // up the zone ID for the booted zone. This goes through + // `MockZone::id` to find the zone and get its ID. + let id_ctx = MockZones::id_context(); + let id = Arc::new(std::sync::Mutex::new(1)); + id_ctx.expect().returning(move |name| { + assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + let mut value = id.lock().unwrap(); + let rv = *value; + *value = rv + 1; + Ok(Some(rv)) + }); + + // Ensure the address exists + let ensure_address_ctx = MockZones::ensure_address_context(); + ensure_address_ctx.expect().returning(|_, _, _| { + Ok(ipnetwork::IpNetwork::new(IpAddr::V6(Ipv6Addr::LOCALHOST), 64) + .unwrap()) + }); + + // Wait for the networking service. + let wait_ctx = svc::wait_for_service_context(); + wait_ctx.expect().returning(|_, _, _| Ok(())); + + // Import the manifest, enable the service + let execute_ctx = illumos_utils::execute_helper_context(); + execute_ctx.expect().times(..).returning(|_| { + Ok(std::process::Output { + status: std::process::ExitStatus::from_raw(0), + stdout: vec![], + stderr: vec![], + }) + }); + + vec![ + Box::new(create_vnic_ctx), + Box::new(install_ctx), + Box::new(boot_ctx), + Box::new(id_ctx), + Box::new(ensure_address_ctx), + Box::new(wait_ctx), + Box::new(execute_ctx), + ] + } + // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { + async fn ensure_new_service( + mgr: &ServiceManager, + id: Uuid, + generation: Generation, + ) { let _expectations = expect_new_service(); - - mgr.ensure_all_services_persistent(ServiceEnsureBody { - services: vec![ServiceZoneRequest { + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation, + zones: vec![OmicronZoneConfig { id, - zone_type: ZoneType::Oximeter, - addresses: vec![Ipv6Addr::LOCALHOST], - dataset: None, - services: vec![ServiceZoneService { - id, - details: ServiceType::Oximeter { - address: SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - OXIMETER_PORT, - 0, - 0, - ), - }, - }], + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Oximeter { address }, }], }) .await @@ -3183,24 +3754,19 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. - async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { - mgr.ensure_all_services_persistent(ServiceEnsureBody { - services: vec![ServiceZoneRequest { + async fn ensure_existing_service( + mgr: &ServiceManager, + id: Uuid, + generation: Generation, + ) { + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation, + zones: vec![OmicronZoneConfig { id, - zone_type: ZoneType::Oximeter, - addresses: vec![Ipv6Addr::LOCALHOST], - dataset: None, - services: vec![ServiceZoneService { - id, - details: ServiceType::Oximeter { - address: SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - OXIMETER_PORT, - 0, - 0, - ), - }, - }], + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Oximeter { address }, }], }) .await @@ -3276,48 +3842,104 @@ mod test { handle } + #[derive(Clone)] + struct LedgerTestHelper<'a> { + log: slog::Logger, + ddmd_client: DdmAdminClient, + storage_handle: StorageHandle, + zone_bundler: ZoneBundler, + test_config: &'a TestConfig, + } + + impl<'a> LedgerTestHelper<'a> { + async fn new( + log: slog::Logger, + test_config: &'a TestConfig, + ) -> LedgerTestHelper { + let ddmd_client = DdmAdminClient::localhost(&log).unwrap(); + let storage_handle = setup_storage().await; + let zone_bundler = ZoneBundler::new( + log.clone(), + storage_handle.clone(), + Default::default(), + ); + + LedgerTestHelper { + log, + ddmd_client, + storage_handle, + zone_bundler, + test_config, + } + } + + fn new_service_manager(self) -> ServiceManager { + let log = &self.log; + let mgr = ServiceManager::new( + log, + self.ddmd_client, + make_bootstrap_networking_config(), + SledMode::Auto, + Some(true), + SidecarRevision::Physical("rev-test".to_string()), + vec![], + self.storage_handle, + self.zone_bundler, + ); + self.test_config.override_paths(&mgr); + mgr + } + + fn sled_agent_started( + log: &slog::Logger, + test_config: &TestConfig, + mgr: &ServiceManager, + ) { + let port_manager = PortManager::new( + log.new(o!("component" => "PortManager")), + Ipv6Addr::new( + 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + ), + ); + + mgr.sled_agent_started( + test_config.make_config(), + port_manager, + Ipv6Addr::LOCALHOST, + Uuid::new_v4(), + None, + ) + .unwrap(); + } + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + let v1 = Generation::new(); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v1); + assert!(found.zones.is_empty()); + + let v2 = v1.next(); + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id, v2).await; - let storage_handle = setup_storage().await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_handle.clone(), - Default::default(), - ); - let mgr = ServiceManager::new( - &log, - DdmAdminClient::localhost(&log).unwrap(), - make_bootstrap_networking_config(), - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle, - zone_bundler, - ); - test_config.override_paths(&mgr); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v2); + assert_eq!(found.zones.len(), 1); + assert_eq!(found.zones[0].id, id); - let port_manager = PortManager::new( - logctx.log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), - ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - ) - .unwrap(); - - let id = Uuid::new_v4(); - ensure_new_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -3329,44 +3951,23 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_ensure_service_which_already_exists", ); - let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); - let storage_handle = setup_storage().await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_handle.clone(), - Default::default(), - ); - let mgr = ServiceManager::new( - &log, - DdmAdminClient::localhost(&log).unwrap(), - make_bootstrap_networking_config(), - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle, - zone_bundler, - ); - test_config.override_paths(&mgr); - - let port_manager = PortManager::new( - logctx.log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), - ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - ) - .unwrap(); - + let v2 = Generation::new().next(); let id = Uuid::new_v4(); - ensure_new_service(&mgr, id).await; - ensure_existing_service(&mgr, id).await; + ensure_new_service(&mgr, id, v2).await; + let v3 = v2.next(); + ensure_existing_service(&mgr, id, v3).await; + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v3); + assert_eq!(found.zones.len(), 1); + assert_eq!(found.zones[0].id, id); + drop_service_manager(mgr); logctx.cleanup_successful(); @@ -3378,77 +3979,31 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); - let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let ddmd_client = DdmAdminClient::localhost(&log).unwrap(); - let bootstrap_networking = make_bootstrap_networking_config(); - - // First, spin up a ServiceManager, create a new service, and tear it - // down. - let storage_handle = setup_storage().await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_handle.clone(), - Default::default(), - ); - let mgr = ServiceManager::new( - &log, - ddmd_client.clone(), - bootstrap_networking.clone(), - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle.clone(), - zone_bundler.clone(), - ); - test_config.override_paths(&mgr); + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), - ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - ) - .unwrap(); + // First, spin up a ServiceManager, create a new zone, and then tear + // down the ServiceManager. + let mgr = helper.clone().new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let v2 = Generation::new().next(); let id = Uuid::new_v4(); - ensure_new_service(&mgr, id).await; + ensure_new_service(&mgr, id, v2).await; drop_service_manager(mgr); // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. let _expectations = expect_new_service(); - let mgr = ServiceManager::new( - &log, - ddmd_client, - bootstrap_networking, - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle.clone(), - zone_bundler.clone(), - ); - test_config.override_paths(&mgr); + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), - ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - ) - .unwrap(); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v2); + assert_eq!(found.zones.len(), 1); + assert_eq!(found.zones[0].id, id); drop_service_manager(mgr); @@ -3461,85 +4016,325 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); - let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let ddmd_client = DdmAdminClient::localhost(&log).unwrap(); - let bootstrap_networking = make_bootstrap_networking_config(); - - // First, spin up a ServiceManager, create a new service, and tear it - // down. - let storage_handle = setup_storage().await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_handle.clone(), - Default::default(), - ); - let mgr = ServiceManager::new( - &log, - ddmd_client.clone(), - bootstrap_networking.clone(), - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle.clone(), - zone_bundler.clone(), - ); - test_config.override_paths(&mgr); + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), - ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - ) - .unwrap(); + // First, spin up a ServiceManager, create a new zone, and then tear + // down the ServiceManager. + let mgr = helper.clone().new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let v1 = Generation::new(); + let v2 = v1.next(); let id = Uuid::new_v4(); - ensure_new_service(&mgr, id).await; + ensure_new_service(&mgr, id, v2).await; drop_service_manager(mgr); - // Next, delete the ledger. This means the service we just created will - // not be remembered on the next initialization. + // Next, delete the ledger. This means the zone we just created will not + // be remembered on the next initialization. std::fs::remove_file( - test_config.config_dir.path().join(SERVICES_LEDGER_FILENAME), + test_config.config_dir.path().join(ZONES_LEDGER_FILENAME), ) .unwrap(); // Observe that the old service is not re-initialized. - let mgr = ServiceManager::new( - &log, - ddmd_client, - bootstrap_networking, - SledMode::Auto, - Some(true), - SidecarRevision::Physical("rev-test".to_string()), - vec![], - storage_handle, - zone_bundler.clone(), - ); - test_config.override_paths(&mgr); + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v1); + assert!(found.zones.is_empty()); + + drop_service_manager(mgr); + + logctx.cleanup_successful(); + } + + #[tokio::test] + #[serial_test::serial] + async fn test_bad_generations() { + // Start like the normal tests. + let logctx = + omicron_test_utils::dev::test_setup_log("test_bad_generations"); + let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + // Like the normal tests, set up a generation with one zone in it. + let v1 = Generation::new(); + let v2 = v1.next(); + let id1 = Uuid::new_v4(); + + let _expectations = expect_new_services(); + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + let mut zones = vec![OmicronZoneConfig { + id: id1, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Oximeter { address }, + }]; + mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation: v2, + zones: zones.clone(), + }) + .await + .unwrap(); + + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v2); + assert_eq!(found.zones.len(), 1); + assert_eq!(found.zones[0].id, id1); + + // Make a new list of zones that we're going to try with a bunch of + // different generation numbers. + let id2 = Uuid::new_v4(); + zones.push(OmicronZoneConfig { + id: id2, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Oximeter { address }, + }); + + // Now try to apply that list with an older generation number. This + // shouldn't work and the reported state should be unchanged. + let error = mgr + .ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation: v1, + zones: zones.clone(), + }) + .await + .expect_err("unexpectedly went backwards in zones generation"); + assert!(matches!( + error, + Error::RequestedConfigOutdated { requested, current } + if requested == v1 && current == v2 + )); + let found2 = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found, found2); + + // Now try to apply that list with the same generation number that we + // used before. This shouldn't work either. + let error = mgr + .ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation: v2, + zones: zones.clone(), + }) + .await + .expect_err("unexpectedly changed a single zone generation"); + assert!(matches!( + error, + Error::RequestedConfigConflicts(vr) if vr == v2 + )); + let found3 = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found, found3); + + // But we should be able to apply this new list of zones as long as we + // advance the generation number. + let v3 = v2.next(); + mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation: v3, + zones: zones.clone(), + }) + .await + .expect("failed to remove all zones in a new generation"); + let found4 = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found4.generation, v3); + let mut our_zones = zones; + our_zones.sort_by(|a, b| a.id.cmp(&b.id)); + let mut found_zones = found4.zones; + found_zones.sort_by(|a, b| a.id.cmp(&b.id)); + assert_eq!(our_zones, found_zones); + + drop_service_manager(mgr); - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), + logctx.cleanup_successful(); + } + + #[tokio::test] + #[serial_test::serial] + async fn test_old_ledger_migration() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_old_ledger_migration", ); - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, + let test_config = TestConfig::new().await; + + // Before we start the service manager, stuff one of our old-format + // service ledgers into place. + let contents = + include_str!("../tests/old-service-ledgers/rack2-sled10.json"); + std::fs::write( + test_config.config_dir.path().join(SERVICES_LEDGER_FILENAME), + contents, + ) + .expect("failed to copy example old-format services ledger into place"); + + // Now start the service manager. + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + let mgr = helper.clone().new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + // Trigger the migration code. (Yes, it's hokey that we create this + // fake argument.) + let unused = Mutex::new(BTreeMap::new()); + let migrated_ledger = mgr + .load_ledgered_zones(&unused.lock().await) + .await + .expect("failed to load ledgered zones") + .unwrap(); + + // As a quick check, the migrated ledger should have some zones. + let migrated_config = migrated_ledger.data(); + assert!(!migrated_config.zones.is_empty()); + + // The ServiceManager should now report the migrated zones, meaning that + // they've been copied into the new-format ledger. + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found, migrated_config.clone().to_omicron_zones_config()); + // They should both match the expected converted output. + let expected: OmicronZonesConfigLocal = serde_json::from_str( + include_str!("../tests/output/new-zones-ledgers/rack2-sled10.json"), ) .unwrap(); + let expected_config = expected.to_omicron_zones_config(); + assert_eq!(found, expected_config); + + // Just to be sure, shut down the manager and create a new one without + // triggering migration again. It should also report the same zones. + drop_service_manager(mgr); + + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found, expected_config); + + drop_service_manager(mgr); + logctx.cleanup_successful(); + } + + #[tokio::test] + #[serial_test::serial] + async fn test_old_ledger_migration_continue() { + // This test is just like "test_old_ledger_migration", except that we + // deploy a new zone after migration and before shutting down the + // service manager. This tests that new changes modify the new, + // migrated config. + let logctx = omicron_test_utils::dev::test_setup_log( + "test_old_ledger_migration_continue", + ); + let test_config = TestConfig::new().await; + + // Before we start the service manager, stuff one of our old-format + // service ledgers into place. + let contents = + include_str!("../tests/old-service-ledgers/rack2-sled10.json"); + std::fs::write( + test_config.config_dir.path().join(SERVICES_LEDGER_FILENAME), + contents, + ) + .expect("failed to copy example old-format services ledger into place"); + + // Now start the service manager. + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + let mgr = helper.clone().new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + // Trigger the migration code. + let unused = Mutex::new(BTreeMap::new()); + let migrated_ledger = mgr + .load_ledgered_zones(&unused.lock().await) + .await + .expect("failed to load ledgered zones") + .unwrap(); + + // The other test verified that migration has happened normally so let's + // assume it has. Now provision a new zone. + let vv = migrated_ledger.data().omicron_generation.next(); + let id = Uuid::new_v4(); + let _expectations = expect_new_services(); + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + let mut zones = + migrated_ledger.data().clone().to_omicron_zones_config().zones; + zones.push(OmicronZoneConfig { + id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Oximeter { address }, + }); + mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { + generation: vv, + zones, + }) + .await + .expect("failed to add new zone after migration"); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, vv); + assert_eq!(found.zones.len(), migrated_ledger.data().zones.len() + 1); + + // Just to be sure, shut down the manager and create a new one without + // triggering migration again. It should now report one more zone than + // was migrated earlier. drop_service_manager(mgr); + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, vv); + assert_eq!(found.zones.len(), migrated_ledger.data().zones.len() + 1); + + drop_service_manager(mgr); + logctx.cleanup_successful(); + } + + #[tokio::test] + #[serial_test::serial] + async fn test_old_ledger_migration_bad() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_old_ledger_migration_bad", + ); + let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + + // Before we start things, stuff a broken ledger into place. For this + // to test what we want, it needs to be a valid ledger that we simply + // failed to convert. + std::fs::write( + test_config.config_dir.path().join(SERVICES_LEDGER_FILENAME), + "{", + ) + .expect("failed to copy example old-format services ledger into place"); + + // Start the service manager. + let mgr = helper.new_service_manager(); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + // Trigger the migration code. + let unused = Mutex::new(BTreeMap::new()); + let error = mgr + .load_ledgered_zones(&unused.lock().await) + .await + .expect_err("succeeded in loading bogus ledgered zones"); + assert_eq!( + "Error migrating old-format services ledger: failed to read or \ + parse old-format ledger, but one exists", + format!("{:#}", error) + ); + logctx.cleanup_successful(); } @@ -3557,19 +4352,19 @@ mod test { } #[test] - fn test_all_zone_requests_schema() { - let schema = schemars::schema_for!(AllZoneRequests); + fn test_zone_bundle_metadata_schema() { + let schema = schemars::schema_for!(ZoneBundleMetadata); expectorate::assert_contents( - "../schema/all-zone-requests.json", + "../schema/zone-bundle-metadata.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } #[test] - fn test_zone_bundle_metadata_schema() { - let schema = schemars::schema_for!(ZoneBundleMetadata); + fn test_all_zones_requests_schema() { + let schema = schemars::schema_for!(OmicronZonesConfigLocal); expectorate::assert_contents( - "../schema/zone-bundle-metadata.json", + "../schema/all-zones-requests.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/services_migration.rs b/sled-agent/src/services_migration.rs new file mode 100644 index 0000000000..bedd4759c8 --- /dev/null +++ b/sled-agent/src/services_migration.rs @@ -0,0 +1,624 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Sled Agents are responsible for running zones that make up much of the +//! control plane (Omicron). Configuration for these zones is owned by the +//! control plane, but that configuration must be persisted locally in order to +//! support cold boot of the control plane. (The control plane can't very well +//! tell sled agents what to run if it's not online yet!) +//! +//! Historically, these configurations were represented as an +//! `AllZonesRequests`, which contains a bunch of `ZoneRequest`s, each +//! containing a `ServiceZoneRequest`. This last structure was quite general +//! and made it possible to express a world of configurations that are not +//! actually valid. To avoid spreading extra complexity, these structures were +//! replaced with `OmicronZonesConfigLocal` and `OmicronZonesConfig`, +//! respectively. Upgrading production systems across this change requires +//! migrating any locally-stored configuration in the old format into the new +//! one. +//! +//! This file defines these old-format types and functions to convert them to +//! the new types, solely to perform that migration. We can remove all this +//! when we're satified that all deployed systems that we care about have moved +//! past this change. + +use crate::params::{ + OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, ZoneType, + OMICRON_ZONES_CONFIG_INITIAL_GENERATION, +}; +use crate::services::{OmicronZoneConfigLocal, OmicronZonesConfigLocal}; +use anyhow::{anyhow, ensure, Context}; +use camino::Utf8PathBuf; +use omicron_common::api::external::Generation; +use omicron_common::api::internal::shared::{ + NetworkInterface, SourceNatConfig, +}; +use omicron_common::ledger::Ledgerable; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_storage::dataset::{DatasetKind, DatasetName}; +use std::fmt::Debug; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use uuid::Uuid; + +/// The filename of the ledger containing this old-format configuration. +pub const SERVICES_LEDGER_FILENAME: &str = "services.json"; + +/// A wrapper around `ZoneRequest` that allows it to be serialized to a JSON +/// file. +#[derive(Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +pub struct AllZoneRequests { + /// ledger generation (not an Omicron-provided generation) + generation: Generation, + requests: Vec, +} + +impl Default for AllZoneRequests { + fn default() -> Self { + Self { generation: Generation::new(), requests: vec![] } + } +} + +impl Ledgerable for AllZoneRequests { + fn is_newer_than(&self, other: &AllZoneRequests) -> bool { + self.generation >= other.generation + } + + fn generation_bump(&mut self) { + self.generation = self.generation.next(); + } +} + +impl TryFrom for OmicronZonesConfigLocal { + type Error = anyhow::Error; + + fn try_from(input: AllZoneRequests) -> Result { + // The Omicron generation number that we choose here (2) deserves some + // explanation. + // + // This is supposed to be the control-plane-issued generation number for + // this configuration. But any configuration that we're converting here + // predates the point where the control plane issued generation numbers + // at all. So what should we assign it? Well, what are the + // constraints? + // + // - It must be newer than generation 1 because generation 1 canonically + // represents the initial state of having no zones deployed. If we + // used generation 1 here, any code could ignore this configuration on + // the grounds that it's no newer than what it already has. (The + // contents of a given generation are supposed to be immutable.) + // + // - It should be older than anything else that the control plane might + // try to send us so that if the control plane wants to change + // anything, we won't ignore its request because we think this + // configuration is newer. But really this has to be the control + // plane's responsibility, not ours. That is: Nexus needs to ask us + // what our generation number is and subsequent configurations should + // use newer generation numbers. It's not a great plan for it to + // assume anything about the generation numbers deployed on sleds + // whose configurations it's never seen. (In practice, newly deployed + // systems currently wind up with generation 5, so it _could_ choose + // something like 6 to start with -- or some larger number to leave + // some buffer.) + // + // In summary, 2 seems fine. + let omicron_generation = + Generation::from(OMICRON_ZONES_CONFIG_INITIAL_GENERATION).next(); + + // The ledger generation doesn't really matter. In case it's useful, we + // pick the generation from the ledger that we loaded. + let ledger_generation = input.generation; + + let ndatasets_input = + input.requests.iter().filter(|r| r.zone.dataset.is_some()).count(); + + let zones = input + .requests + .into_iter() + .map(OmicronZoneConfigLocal::try_from) + .collect::, _>>() + .context( + "mapping `AllZoneRequests` to `OmicronZonesConfigLocal`", + )?; + + // As a quick check, the number of datasets in the old and new + // generations ought to be the same. + let ndatasets_output = + zones.iter().filter(|r| r.zone.dataset_name().is_some()).count(); + ensure!( + ndatasets_input == ndatasets_output, + "conversion produced a different number of datasets" + ); + + Ok(OmicronZonesConfigLocal { + omicron_generation, + ledger_generation, + zones, + }) + } +} + +/// This struct represents the combo of "what zone did you ask for" + "where did +/// we put it". +#[derive(Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +struct ZoneRequest { + zone: ServiceZoneRequest, + #[schemars(with = "String")] + root: Utf8PathBuf, +} + +impl TryFrom for OmicronZoneConfigLocal { + type Error = anyhow::Error; + + fn try_from(input: ZoneRequest) -> Result { + Ok(OmicronZoneConfigLocal { + zone: OmicronZoneConfig::try_from(input.zone)?, + root: input.root, + }) + } +} + +/// Describes a request to create a zone running one or more services. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +struct ServiceZoneRequest { + // The UUID of the zone to be initialized. + id: Uuid, + // The type of the zone to be created. + zone_type: ZoneType, + // The addresses on which the service should listen for requests. + addresses: Vec, + // Datasets which should be managed by this service. + #[serde(default)] + dataset: Option, + // Services that should be run in the zone + services: Vec, +} + +impl TryFrom for OmicronZoneConfig { + type Error = anyhow::Error; + + fn try_from(input: ServiceZoneRequest) -> Result { + let error_context = || { + format!( + "zone {} (type {:?})", + input.id, + input.zone_type.to_string() + ) + }; + + // Historically, this type was used to describe two distinct kinds of + // thing: + // + // 1. an "Omicron" zone: Clickhouse, CockroachDb, Nexus, etc. We call + // these Omicron zones because they're managed by the control plane + // (Omicron). Nexus knows about these, stores information in + // CockroachDB about them, and is responsible for using Sled Agent + // APIs to configure these zones. + // + // 2. a "sled-local" zone. The only such zone is the "switch" zone. + // This is not really known to Nexus nor exposed outside Sled Agent. + // It's configured either based on Sled Agent's config file or else + // autodetection of whether this system _is_ a Scrimlet. + // + // All of the types in this file describe the ledgered configuration of + // the Omicron zones. We don't care about the switch zone here. Even + // for Omicron zones, the `ServiceZoneRequest` type is much more general + // than was strictly necessary to represent the kinds of zones we + // defined in practice. The more constrained schema is described by + // `OmicronZoneConfig`. This function verifies that the structures we + // find conform to that more constrained schema. + // + // Many of these properties were determined by code inspection. They + // could be wrong! But we've tried hard to make sure we're not wrong. + + match input.zone_type { + ZoneType::Clickhouse + | ZoneType::ClickhouseKeeper + | ZoneType::CockroachDb + | ZoneType::CruciblePantry + | ZoneType::Crucible + | ZoneType::ExternalDns + | ZoneType::InternalDns + | ZoneType::Nexus + | ZoneType::Ntp + | ZoneType::Oximeter => (), + ZoneType::Switch => { + return Err(anyhow!("unsupported zone type")) + .with_context(error_context) + } + } + + let id = input.id; + + // In production systems, Omicron zones only ever had exactly one + // address here. Multiple addresses were used for the "switch" zone, + // which cannot appear here. + if input.addresses.len() != 1 { + return Err(anyhow!( + "expected exactly one address, found {}", + input.addresses.len() + )) + .with_context(error_context); + } + + let underlay_address = input.addresses[0]; + + // In production systems, Omicron zones only ever had exactly one + // "service" inside them. (Multiple services were only supported for + // the "switch" zone and for Omicron zones in pre-release versions of + // Omicron, neither of which we expect to see here.) + if input.services.len() != 1 { + return Err(anyhow!( + "expected exactly one service, found {}", + input.services.len(), + )) + .with_context(error_context); + } + + let service = input.services.into_iter().next().unwrap(); + + // The id for the one service we found must match the overall request + // id. + if service.id != input.id { + return Err(anyhow!( + "expected service id ({}) to match id ({})", + service.id, + input.id, + )) + .with_context(error_context); + } + + // If there's a dataset, its id must match the overall request id. + let dataset_request = input + .dataset + .ok_or_else(|| anyhow!("missing dataset")) + .with_context(error_context); + let has_dataset = dataset_request.is_ok(); + if let Ok(dataset) = &dataset_request { + if dataset.id != input.id { + return Err(anyhow!( + "expected dataset id ({}) to match id ({})", + dataset.id, + input.id, + )) + .with_context(error_context); + } + } + + let zone_type = match service.details { + ServiceType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + }, + ServiceType::ExternalDns { http_address, dns_address, nic } => { + OmicronZoneType::ExternalDns { + dataset: dataset_request?.to_omicron_zone_dataset( + DatasetKind::ExternalDns, + http_address, + )?, + http_address, + dns_address, + nic, + } + } + ServiceType::InternalDns { + http_address, + dns_address, + gz_address, + gz_address_index, + } => OmicronZoneType::InternalDns { + dataset: dataset_request?.to_omicron_zone_dataset( + DatasetKind::InternalDns, + http_address, + )?, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ServiceType::Oximeter { address } => { + OmicronZoneType::Oximeter { address } + } + ServiceType::CruciblePantry { address } => { + OmicronZoneType::CruciblePantry { address } + } + ServiceType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + }, + ServiceType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ServiceType::Clickhouse { address } => { + OmicronZoneType::Clickhouse { + address, + dataset: dataset_request?.to_omicron_zone_dataset( + DatasetKind::Clickhouse, + address, + )?, + } + } + ServiceType::ClickhouseKeeper { address } => { + OmicronZoneType::ClickhouseKeeper { + address, + dataset: dataset_request?.to_omicron_zone_dataset( + DatasetKind::ClickhouseKeeper, + address, + )?, + } + } + ServiceType::CockroachDb { address } => { + OmicronZoneType::CockroachDb { + address, + dataset: dataset_request?.to_omicron_zone_dataset( + DatasetKind::CockroachDb, + address, + )?, + } + } + ServiceType::Crucible { address } => OmicronZoneType::Crucible { + address, + dataset: dataset_request? + .to_omicron_zone_dataset(DatasetKind::Crucible, address)?, + }, + }; + + if zone_type.dataset_name().is_none() && has_dataset { + // This indicates that the legacy form specified a dataset for a + // zone type that we do not (today) believe should have one. This + // should be impossible. If it happens, we need to re-evaluate our + // assumptions in designing `OmicronZoneType`. + return Err(anyhow!("found dataset that went unused")) + .with_context(error_context); + } + + Ok(OmicronZoneConfig { id, underlay_address, zone_type }) + } +} + +/// Used to request that the Sled initialize a single service. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +struct ServiceZoneService { + id: Uuid, + details: ServiceType, +} + +/// Describes service-specific parameters. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case")] +enum ServiceType { + Nexus { + /// The address at which the internal nexus server is reachable. + internal_address: SocketAddrV6, + /// The address at which the external nexus server is reachable. + external_ip: IpAddr, + /// The service vNIC providing external connectivity using OPTE. + nic: NetworkInterface, + /// Whether Nexus's external endpoint should use TLS + external_tls: bool, + /// External DNS servers Nexus can use to resolve external hosts. + external_dns_servers: Vec, + }, + ExternalDns { + /// The address at which the external DNS server API is reachable. + http_address: SocketAddrV6, + /// The address at which the external DNS server is reachable. + dns_address: SocketAddr, + /// The service vNIC providing external connectivity using OPTE. + nic: NetworkInterface, + }, + InternalDns { + http_address: SocketAddrV6, + dns_address: SocketAddrV6, + /// The addresses in the global zone which should be created + /// + /// For the DNS service, which exists outside the sleds's typical subnet + /// - adding an address in the GZ is necessary to allow inter-zone + /// traffic routing. + gz_address: Ipv6Addr, + + /// The address is also identified with an auxiliary bit of information + /// to ensure that the created global zone address can have a unique + /// name. + gz_address_index: u32, + }, + Oximeter { + address: SocketAddrV6, + }, + CruciblePantry { + address: SocketAddrV6, + }, + BoundaryNtp { + address: SocketAddrV6, + ntp_servers: Vec, + dns_servers: Vec, + domain: Option, + /// The service vNIC providing outbound connectivity using OPTE. + nic: NetworkInterface, + /// The SNAT configuration for outbound connections. + snat_cfg: SourceNatConfig, + }, + InternalNtp { + address: SocketAddrV6, + ntp_servers: Vec, + dns_servers: Vec, + domain: Option, + }, + Clickhouse { + address: SocketAddrV6, + }, + ClickhouseKeeper { + address: SocketAddrV6, + }, + CockroachDb { + address: SocketAddrV6, + }, + Crucible { + address: SocketAddrV6, + }, +} + +/// Describes a request to provision a specific dataset +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +struct DatasetRequest { + id: Uuid, + name: DatasetName, + service_address: SocketAddrV6, +} + +impl DatasetRequest { + fn to_omicron_zone_dataset( + self, + kind: DatasetKind, + service_address: SocketAddrV6, + ) -> Result { + ensure!( + kind == *self.name.dataset(), + "expected dataset kind {:?}, found {:?}", + kind, + self.name.dataset(), + ); + + ensure!( + self.service_address == service_address, + "expected dataset kind {:?} service address to be {}, found {}", + kind, + service_address, + self.service_address, + ); + + Ok(OmicronZoneDataset { pool_name: self.name.pool().clone() }) + } +} + +#[cfg(test)] +mod test { + use super::AllZoneRequests; + use crate::services::OmicronZonesConfigLocal; + use camino::Utf8PathBuf; + + /// Verifies that our understanding of this old-format ledger has not + /// changed. (If you need to change this for some reason, you must figure + /// out how that affects systems with old-format ledgers and update this + /// test accordingly.) + #[test] + fn test_all_services_requests_schema() { + let schema = schemars::schema_for!(AllZoneRequests); + expectorate::assert_contents( + "../schema/all-zone-requests.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } + + /// Verifies that we can successfully convert a corpus of known old-format + /// ledgers. These came from two racks operated by Oxide. In practice + /// there probably aren't many different configurations represented here but + /// it's easy enough to just check them all. + /// + /// In terms of verifying the output: all we have done by hand in + /// constructing this test is verify that the code successfully converts + /// them. The conversion code does some basic sanity checks as well, like + /// that we produced the same number of zones and datasets. + #[test] + fn test_convert_known_ledgers() { + let known_ledgers = &[ + /* rack2 */ + "rack2-sled8.json", + "rack2-sled9.json", + "rack2-sled10.json", + "rack2-sled11.json", + "rack2-sled12.json", + "rack2-sled14.json", + "rack2-sled16.json", + "rack2-sled17.json", + "rack2-sled21.json", + "rack2-sled23.json", + "rack2-sled25.json", + /* rack3 (no sled 10) */ + "rack3-sled0.json", + "rack3-sled1.json", + "rack3-sled2.json", + "rack3-sled3.json", + "rack3-sled4.json", + "rack3-sled5.json", + "rack3-sled6.json", + "rack3-sled7.json", + "rack3-sled8.json", + "rack3-sled9.json", + "rack3-sled11.json", + "rack3-sled12.json", + "rack3-sled13.json", + "rack3-sled14.json", + "rack3-sled15.json", + "rack3-sled16.json", + "rack3-sled17.json", + "rack3-sled18.json", + "rack3-sled19.json", + "rack3-sled20.json", + "rack3-sled21.json", + "rack3-sled22.json", + "rack3-sled23.json", + "rack3-sled24.json", + "rack3-sled25.json", + "rack3-sled26.json", + "rack3-sled27.json", + "rack3-sled28.json", + "rack3-sled29.json", + "rack3-sled30.json", + "rack3-sled31.json", + ]; + + let path = Utf8PathBuf::from("tests/old-service-ledgers"); + let out_path = Utf8PathBuf::from("tests/output/new-zones-ledgers"); + for ledger_basename in known_ledgers { + println!("checking {:?}", ledger_basename); + let contents = std::fs::read_to_string(path.join(ledger_basename)) + .expect("failed to read file"); + let parsed: AllZoneRequests = + serde_json::from_str(&contents).expect("failed to parse file"); + let converted = OmicronZonesConfigLocal::try_from(parsed) + .expect("failed to convert file"); + expectorate::assert_contents( + out_path.join(ledger_basename), + &serde_json::to_string_pretty(&converted).unwrap(), + ); + } + } +} diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 9f8d31b3c5..90e9706198 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -17,7 +17,7 @@ use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, ServiceEnsureBody, SledRole, TimeSync, + InstanceUnregisterResponse, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; @@ -555,12 +555,11 @@ impl SledAgent { Ok(sled_agent) } - /// Load services for which we're responsible; only meaningful to call - /// during a cold boot. + /// Load services for which we're responsible. /// /// Blocks until all services have started, retrying indefinitely on /// failure. - pub(crate) async fn cold_boot_load_services(&self) { + pub(crate) async fn load_services(&self) { info!(self.log, "Loading cold boot services"); retry_notify( retry_policy_internal_service_aggressive(), @@ -803,36 +802,40 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } - /// Ensures that particular services should be initialized. - /// - /// These services will be instantiated by this function, will be recorded - /// to a local file to ensure they start automatically on next boot. - pub async fn services_ensure( + /// List the Omicron zone configuration that's currently running + pub async fn omicron_zones_list( &self, - requested_services: ServiceEnsureBody, - ) -> Result<(), Error> { - let datasets: Vec<_> = requested_services - .services - .iter() - .filter_map(|service| service.dataset.clone()) - .collect(); + ) -> Result { + Ok(self.inner.services.omicron_zones_list().await?) + } + /// Ensures that the specific set of Omicron zones are running as configured + /// (and that no other zones are running) + pub async fn omicron_zones_ensure( + &self, + requested_zones: OmicronZonesConfig, + ) -> Result<(), Error> { // TODO: // - If these are the set of filesystems, we should also consider // removing the ones which are not listed here. // - It's probably worth sending a bulk request to the storage system, // rather than requesting individual datasets. - for dataset in &datasets { + for zone in &requested_zones.zones { + let Some(dataset_name) = zone.dataset_name() else { + continue; + }; + // First, ensure the dataset exists + let dataset_id = zone.id; self.inner .storage - .upsert_filesystem(dataset.id, dataset.name.clone()) + .upsert_filesystem(dataset_id, dataset_name) .await?; } self.inner .services - .ensure_all_services_persistent(requested_services) + .ensure_all_omicron_zones_persistent(requested_zones) .await?; Ok(()) } diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled10.json b/sled-agent/tests/old-service-ledgers/rack2-sled10.json new file mode 100644 index 0000000000..b92a2bf4a0 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled10.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"04eef8aa-055c-42ab-bdb6-c982f63c9be0","zone_type":"crucible","addresses":["fd00:1122:3344:107::d"],"dataset":{"id":"04eef8aa-055c-42ab-bdb6-c982f63c9be0","name":{"pool_name":"oxp_845ff39a-3205-416f-8bda-e35829107c8a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::d]:32345"},"services":[{"id":"04eef8aa-055c-42ab-bdb6-c982f63c9be0","details":{"type":"crucible","address":"[fd00:1122:3344:107::d]:32345"}}]},"root":"/pool/ext/43efdd6d-7419-437a-a282-fc45bfafd042/crypt/zone"},{"zone":{"id":"8568c997-fbbb-46a8-8549-b78284530ffc","zone_type":"crucible","addresses":["fd00:1122:3344:107::5"],"dataset":{"id":"8568c997-fbbb-46a8-8549-b78284530ffc","name":{"pool_name":"oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::5]:32345"},"services":[{"id":"8568c997-fbbb-46a8-8549-b78284530ffc","details":{"type":"crucible","address":"[fd00:1122:3344:107::5]:32345"}}]},"root":"/pool/ext/9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf/crypt/zone"},{"zone":{"id":"6cec1d60-5c1a-4c1b-9632-2b4bc76bd37c","zone_type":"crucible","addresses":["fd00:1122:3344:107::e"],"dataset":{"id":"6cec1d60-5c1a-4c1b-9632-2b4bc76bd37c","name":{"pool_name":"oxp_62a4c68a-2073-42d0-8e49-01f5e8b90cd4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::e]:32345"},"services":[{"id":"6cec1d60-5c1a-4c1b-9632-2b4bc76bd37c","details":{"type":"crucible","address":"[fd00:1122:3344:107::e]:32345"}}]},"root":"/pool/ext/845ff39a-3205-416f-8bda-e35829107c8a/crypt/zone"},{"zone":{"id":"aa646c82-c6d7-4d0c-8401-150130927759","zone_type":"clickhouse","addresses":["fd00:1122:3344:107::4"],"dataset":{"id":"aa646c82-c6d7-4d0c-8401-150130927759","name":{"pool_name":"oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae","kind":{"type":"clickhouse"}},"service_address":"[fd00:1122:3344:107::4]:8123"},"services":[{"id":"aa646c82-c6d7-4d0c-8401-150130927759","details":{"type":"clickhouse","address":"[fd00:1122:3344:107::4]:8123"}}]},"root":"/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone"},{"zone":{"id":"2f294ca1-7a4f-468f-8966-2b7915804729","zone_type":"crucible","addresses":["fd00:1122:3344:107::7"],"dataset":{"id":"2f294ca1-7a4f-468f-8966-2b7915804729","name":{"pool_name":"oxp_43efdd6d-7419-437a-a282-fc45bfafd042","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::7]:32345"},"services":[{"id":"2f294ca1-7a4f-468f-8966-2b7915804729","details":{"type":"crucible","address":"[fd00:1122:3344:107::7]:32345"}}]},"root":"/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone"},{"zone":{"id":"1a77bd1d-4fd4-4d6c-a105-17f942d94ba6","zone_type":"crucible","addresses":["fd00:1122:3344:107::c"],"dataset":{"id":"1a77bd1d-4fd4-4d6c-a105-17f942d94ba6","name":{"pool_name":"oxp_b6bdfdaf-9c0d-4b74-926c-49ff3ed05562","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::c]:32345"},"services":[{"id":"1a77bd1d-4fd4-4d6c-a105-17f942d94ba6","details":{"type":"crucible","address":"[fd00:1122:3344:107::c]:32345"}}]},"root":"/pool/ext/9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf/crypt/zone"},{"zone":{"id":"f65a6668-1aea-4deb-81ed-191fbe469328","zone_type":"crucible","addresses":["fd00:1122:3344:107::9"],"dataset":{"id":"f65a6668-1aea-4deb-81ed-191fbe469328","name":{"pool_name":"oxp_9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::9]:32345"},"services":[{"id":"f65a6668-1aea-4deb-81ed-191fbe469328","details":{"type":"crucible","address":"[fd00:1122:3344:107::9]:32345"}}]},"root":"/pool/ext/d0584f4a-20ba-436d-a75b-7709e80deb79/crypt/zone"},{"zone":{"id":"ee8bce67-8f8e-4221-97b0-85f1860d66d0","zone_type":"crucible","addresses":["fd00:1122:3344:107::8"],"dataset":{"id":"ee8bce67-8f8e-4221-97b0-85f1860d66d0","name":{"pool_name":"oxp_b252b176-3974-436a-915b-60382b21eb76","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::8]:32345"},"services":[{"id":"ee8bce67-8f8e-4221-97b0-85f1860d66d0","details":{"type":"crucible","address":"[fd00:1122:3344:107::8]:32345"}}]},"root":"/pool/ext/b6bdfdaf-9c0d-4b74-926c-49ff3ed05562/crypt/zone"},{"zone":{"id":"cf3b2d54-5e36-4c93-b44f-8bf36ac98071","zone_type":"crucible","addresses":["fd00:1122:3344:107::b"],"dataset":{"id":"cf3b2d54-5e36-4c93-b44f-8bf36ac98071","name":{"pool_name":"oxp_d0584f4a-20ba-436d-a75b-7709e80deb79","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::b]:32345"},"services":[{"id":"cf3b2d54-5e36-4c93-b44f-8bf36ac98071","details":{"type":"crucible","address":"[fd00:1122:3344:107::b]:32345"}}]},"root":"/pool/ext/4c157f35-865d-4310-9d81-c6259cb69293/crypt/zone"},{"zone":{"id":"5c8c244c-00dc-4b16-aa17-6d9eb4827fab","zone_type":"crucible","addresses":["fd00:1122:3344:107::a"],"dataset":{"id":"5c8c244c-00dc-4b16-aa17-6d9eb4827fab","name":{"pool_name":"oxp_4c157f35-865d-4310-9d81-c6259cb69293","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::a]:32345"},"services":[{"id":"5c8c244c-00dc-4b16-aa17-6d9eb4827fab","details":{"type":"crucible","address":"[fd00:1122:3344:107::a]:32345"}}]},"root":"/pool/ext/845ff39a-3205-416f-8bda-e35829107c8a/crypt/zone"},{"zone":{"id":"7d5e942b-926c-442d-937a-76cc4aa72bf3","zone_type":"crucible","addresses":["fd00:1122:3344:107::6"],"dataset":{"id":"7d5e942b-926c-442d-937a-76cc4aa72bf3","name":{"pool_name":"oxp_fd82dcc7-00dd-4d01-826a-937a7d8238fb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::6]:32345"},"services":[{"id":"7d5e942b-926c-442d-937a-76cc4aa72bf3","details":{"type":"crucible","address":"[fd00:1122:3344:107::6]:32345"}}]},"root":"/pool/ext/b252b176-3974-436a-915b-60382b21eb76/crypt/zone"},{"zone":{"id":"a3628a56-6f85-43b5-be50-71d8f0e04877","zone_type":"cockroach_db","addresses":["fd00:1122:3344:107::3"],"dataset":{"id":"a3628a56-6f85-43b5-be50-71d8f0e04877","name":{"pool_name":"oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:107::3]:32221"},"services":[{"id":"a3628a56-6f85-43b5-be50-71d8f0e04877","details":{"type":"cockroach_db","address":"[fd00:1122:3344:107::3]:32221"}}]},"root":"/pool/ext/4c157f35-865d-4310-9d81-c6259cb69293/crypt/zone"},{"zone":{"id":"7529be1c-ca8b-441a-89aa-37166cc450df","zone_type":"ntp","addresses":["fd00:1122:3344:107::f"],"dataset":null,"services":[{"id":"7529be1c-ca8b-441a-89aa-37166cc450df","details":{"type":"internal_ntp","address":"[fd00:1122:3344:107::f]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled11.json b/sled-agent/tests/old-service-ledgers/rack2-sled11.json new file mode 100644 index 0000000000..3833bed5c9 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled11.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"605be8b9-c652-4a5f-94ca-068ec7a39472","zone_type":"crucible","addresses":["fd00:1122:3344:106::a"],"dataset":{"id":"605be8b9-c652-4a5f-94ca-068ec7a39472","name":{"pool_name":"oxp_cf14d1b9-b4db-4594-b3ab-a9957e770ce9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::a]:32345"},"services":[{"id":"605be8b9-c652-4a5f-94ca-068ec7a39472","details":{"type":"crucible","address":"[fd00:1122:3344:106::a]:32345"}}]},"root":"/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone"},{"zone":{"id":"af8a8712-457c-4ea7-a8b6-aecb04761c1b","zone_type":"crucible","addresses":["fd00:1122:3344:106::9"],"dataset":{"id":"af8a8712-457c-4ea7-a8b6-aecb04761c1b","name":{"pool_name":"oxp_cf5f8849-0c5a-475b-8683-6d17da88d1d1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::9]:32345"},"services":[{"id":"af8a8712-457c-4ea7-a8b6-aecb04761c1b","details":{"type":"crucible","address":"[fd00:1122:3344:106::9]:32345"}}]},"root":"/pool/ext/7f778610-7328-4554-98f6-b17f74f551c7/crypt/zone"},{"zone":{"id":"0022703b-dcfc-44d4-897a-b42f6f53b433","zone_type":"crucible","addresses":["fd00:1122:3344:106::c"],"dataset":{"id":"0022703b-dcfc-44d4-897a-b42f6f53b433","name":{"pool_name":"oxp_025725fa-9e40-4b46-b018-c420408394ef","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::c]:32345"},"services":[{"id":"0022703b-dcfc-44d4-897a-b42f6f53b433","details":{"type":"crucible","address":"[fd00:1122:3344:106::c]:32345"}}]},"root":"/pool/ext/025725fa-9e40-4b46-b018-c420408394ef/crypt/zone"},{"zone":{"id":"fffddf56-10ca-4b62-9be3-5b3764a5f682","zone_type":"crucible","addresses":["fd00:1122:3344:106::d"],"dataset":{"id":"fffddf56-10ca-4b62-9be3-5b3764a5f682","name":{"pool_name":"oxp_4d2f5aaf-eb14-4b1e-aa99-ae38ec844605","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::d]:32345"},"services":[{"id":"fffddf56-10ca-4b62-9be3-5b3764a5f682","details":{"type":"crucible","address":"[fd00:1122:3344:106::d]:32345"}}]},"root":"/pool/ext/834c9aad-c53b-4357-bc3f-f422efa63848/crypt/zone"},{"zone":{"id":"9b8194ee-917d-4abc-a55c-94cea6cdaea1","zone_type":"crucible","addresses":["fd00:1122:3344:106::6"],"dataset":{"id":"9b8194ee-917d-4abc-a55c-94cea6cdaea1","name":{"pool_name":"oxp_d7665e0d-9354-4341-a76f-965d7c49f277","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::6]:32345"},"services":[{"id":"9b8194ee-917d-4abc-a55c-94cea6cdaea1","details":{"type":"crucible","address":"[fd00:1122:3344:106::6]:32345"}}]},"root":"/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone"},{"zone":{"id":"b369e133-485c-4d98-8fee-83542d1fd94d","zone_type":"crucible","addresses":["fd00:1122:3344:106::4"],"dataset":{"id":"b369e133-485c-4d98-8fee-83542d1fd94d","name":{"pool_name":"oxp_4366f80d-3902-4b93-8f2d-380008e805fc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::4]:32345"},"services":[{"id":"b369e133-485c-4d98-8fee-83542d1fd94d","details":{"type":"crucible","address":"[fd00:1122:3344:106::4]:32345"}}]},"root":"/pool/ext/025725fa-9e40-4b46-b018-c420408394ef/crypt/zone"},{"zone":{"id":"edd99650-5df1-4241-815d-253e4ef2399c","zone_type":"external_dns","addresses":["fd00:1122:3344:106::3"],"dataset":{"id":"edd99650-5df1-4241-815d-253e4ef2399c","name":{"pool_name":"oxp_4366f80d-3902-4b93-8f2d-380008e805fc","kind":{"type":"external_dns"}},"service_address":"[fd00:1122:3344:106::3]:5353"},"services":[{"id":"edd99650-5df1-4241-815d-253e4ef2399c","details":{"type":"external_dns","http_address":"[fd00:1122:3344:106::3]:5353","dns_address":"172.20.26.1:53","nic":{"id":"99b759fc-8e2e-44b7-aca8-93c3b201974d","kind":{"type":"service","id":"edd99650-5df1-4241-815d-253e4ef2399c"},"name":"external-dns-edd99650-5df1-4241-815d-253e4ef2399c","ip":"172.30.1.5","mac":"A8:40:25:FF:B0:9C","subnet":"172.30.1.0/24","vni":100,"primary":true,"slot":0}}}]},"root":"/pool/ext/7f778610-7328-4554-98f6-b17f74f551c7/crypt/zone"},{"zone":{"id":"46d1afcc-cc3f-4b17-aafc-054dd4862d15","zone_type":"crucible","addresses":["fd00:1122:3344:106::5"],"dataset":{"id":"46d1afcc-cc3f-4b17-aafc-054dd4862d15","name":{"pool_name":"oxp_7f778610-7328-4554-98f6-b17f74f551c7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::5]:32345"},"services":[{"id":"46d1afcc-cc3f-4b17-aafc-054dd4862d15","details":{"type":"crucible","address":"[fd00:1122:3344:106::5]:32345"}}]},"root":"/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone"},{"zone":{"id":"12afe1c3-bfe6-4278-8240-91d401347d36","zone_type":"crucible","addresses":["fd00:1122:3344:106::8"],"dataset":{"id":"12afe1c3-bfe6-4278-8240-91d401347d36","name":{"pool_name":"oxp_534bcd4b-502f-4109-af6e-4b28a22c20f1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::8]:32345"},"services":[{"id":"12afe1c3-bfe6-4278-8240-91d401347d36","details":{"type":"crucible","address":"[fd00:1122:3344:106::8]:32345"}}]},"root":"/pool/ext/4366f80d-3902-4b93-8f2d-380008e805fc/crypt/zone"},{"zone":{"id":"c33b5912-9985-43ed-98f2-41297e2b796a","zone_type":"crucible","addresses":["fd00:1122:3344:106::b"],"dataset":{"id":"c33b5912-9985-43ed-98f2-41297e2b796a","name":{"pool_name":"oxp_834c9aad-c53b-4357-bc3f-f422efa63848","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::b]:32345"},"services":[{"id":"c33b5912-9985-43ed-98f2-41297e2b796a","details":{"type":"crucible","address":"[fd00:1122:3344:106::b]:32345"}}]},"root":"/pool/ext/d7665e0d-9354-4341-a76f-965d7c49f277/crypt/zone"},{"zone":{"id":"65b3db59-9361-4100-9cee-04e32a8c67d3","zone_type":"crucible","addresses":["fd00:1122:3344:106::7"],"dataset":{"id":"65b3db59-9361-4100-9cee-04e32a8c67d3","name":{"pool_name":"oxp_32b5303f-f667-4345-84d2-c7eec63b91b2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::7]:32345"},"services":[{"id":"65b3db59-9361-4100-9cee-04e32a8c67d3","details":{"type":"crucible","address":"[fd00:1122:3344:106::7]:32345"}}]},"root":"/pool/ext/d7665e0d-9354-4341-a76f-965d7c49f277/crypt/zone"},{"zone":{"id":"82500cc9-f33d-4d59-9e6e-d70ea6133077","zone_type":"ntp","addresses":["fd00:1122:3344:106::e"],"dataset":null,"services":[{"id":"82500cc9-f33d-4d59-9e6e-d70ea6133077","details":{"type":"internal_ntp","address":"[fd00:1122:3344:106::e]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/cf14d1b9-b4db-4594-b3ab-a9957e770ce9/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled12.json b/sled-agent/tests/old-service-ledgers/rack2-sled12.json new file mode 100644 index 0000000000..5126c007f3 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled12.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"a76b3357-b690-43b8-8352-3300568ffc2b","zone_type":"crucible","addresses":["fd00:1122:3344:104::a"],"dataset":{"id":"a76b3357-b690-43b8-8352-3300568ffc2b","name":{"pool_name":"oxp_05715ad8-59a1-44ab-ad5f-0cdffb46baab","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::a]:32345"},"services":[{"id":"a76b3357-b690-43b8-8352-3300568ffc2b","details":{"type":"crucible","address":"[fd00:1122:3344:104::a]:32345"}}]},"root":"/pool/ext/2ec2a731-3340-4777-b1bb-4a906c598174/crypt/zone"},{"zone":{"id":"8d202759-ca06-4383-b50f-7f3ec4062bf7","zone_type":"crucible","addresses":["fd00:1122:3344:104::4"],"dataset":{"id":"8d202759-ca06-4383-b50f-7f3ec4062bf7","name":{"pool_name":"oxp_56e32a8f-0877-4437-9cab-94a4928b1495","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::4]:32345"},"services":[{"id":"8d202759-ca06-4383-b50f-7f3ec4062bf7","details":{"type":"crucible","address":"[fd00:1122:3344:104::4]:32345"}}]},"root":"/pool/ext/613b58fc-5a80-42dc-a61c-b143cf220fb5/crypt/zone"},{"zone":{"id":"fcdda266-fc6a-4518-89db-aec007a4b682","zone_type":"crucible","addresses":["fd00:1122:3344:104::b"],"dataset":{"id":"fcdda266-fc6a-4518-89db-aec007a4b682","name":{"pool_name":"oxp_7e1293ad-b903-4054-aeae-2182d5e4a785","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::b]:32345"},"services":[{"id":"fcdda266-fc6a-4518-89db-aec007a4b682","details":{"type":"crucible","address":"[fd00:1122:3344:104::b]:32345"}}]},"root":"/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone"},{"zone":{"id":"167cf6a2-ec51-4de2-bc6c-7785bbc0e436","zone_type":"crucible","addresses":["fd00:1122:3344:104::c"],"dataset":{"id":"167cf6a2-ec51-4de2-bc6c-7785bbc0e436","name":{"pool_name":"oxp_f96c8d49-fdf7-4bd6-84f6-c282202d1abc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::c]:32345"},"services":[{"id":"167cf6a2-ec51-4de2-bc6c-7785bbc0e436","details":{"type":"crucible","address":"[fd00:1122:3344:104::c]:32345"}}]},"root":"/pool/ext/56e32a8f-0877-4437-9cab-94a4928b1495/crypt/zone"},{"zone":{"id":"c6fde82d-8dae-4ef0-b557-6c3d094d9454","zone_type":"crucible","addresses":["fd00:1122:3344:104::9"],"dataset":{"id":"c6fde82d-8dae-4ef0-b557-6c3d094d9454","name":{"pool_name":"oxp_416fd29e-d3b5-4fdf-8101-d0d163fa0706","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::9]:32345"},"services":[{"id":"c6fde82d-8dae-4ef0-b557-6c3d094d9454","details":{"type":"crucible","address":"[fd00:1122:3344:104::9]:32345"}}]},"root":"/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone"},{"zone":{"id":"650f5da7-86a0-4ade-af0f-bc96e021ded0","zone_type":"crucible","addresses":["fd00:1122:3344:104::5"],"dataset":{"id":"650f5da7-86a0-4ade-af0f-bc96e021ded0","name":{"pool_name":"oxp_b4a71d3d-1ecd-418a-9a52-8d118f82082b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::5]:32345"},"services":[{"id":"650f5da7-86a0-4ade-af0f-bc96e021ded0","details":{"type":"crucible","address":"[fd00:1122:3344:104::5]:32345"}}]},"root":"/pool/ext/613b58fc-5a80-42dc-a61c-b143cf220fb5/crypt/zone"},{"zone":{"id":"7ce9a2c5-2d37-4188-b7b5-a9db819396c3","zone_type":"crucible","addresses":["fd00:1122:3344:104::d"],"dataset":{"id":"7ce9a2c5-2d37-4188-b7b5-a9db819396c3","name":{"pool_name":"oxp_c87d16b8-e814-4159-8562-f8d7fdd19d13","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::d]:32345"},"services":[{"id":"7ce9a2c5-2d37-4188-b7b5-a9db819396c3","details":{"type":"crucible","address":"[fd00:1122:3344:104::d]:32345"}}]},"root":"/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone"},{"zone":{"id":"23e1cf01-70ab-422f-997b-6216158965c3","zone_type":"crucible","addresses":["fd00:1122:3344:104::8"],"dataset":{"id":"23e1cf01-70ab-422f-997b-6216158965c3","name":{"pool_name":"oxp_3af01cc4-1f16-47d9-a489-abafcb91c2db","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::8]:32345"},"services":[{"id":"23e1cf01-70ab-422f-997b-6216158965c3","details":{"type":"crucible","address":"[fd00:1122:3344:104::8]:32345"}}]},"root":"/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone"},{"zone":{"id":"50209816-89fb-48ed-9595-16899d114844","zone_type":"crucible","addresses":["fd00:1122:3344:104::6"],"dataset":{"id":"50209816-89fb-48ed-9595-16899d114844","name":{"pool_name":"oxp_2ec2a731-3340-4777-b1bb-4a906c598174","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::6]:32345"},"services":[{"id":"50209816-89fb-48ed-9595-16899d114844","details":{"type":"crucible","address":"[fd00:1122:3344:104::6]:32345"}}]},"root":"/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone"},{"zone":{"id":"20b100d0-84c3-4119-aa9b-0c632b0b6a3a","zone_type":"nexus","addresses":["fd00:1122:3344:104::3"],"dataset":null,"services":[{"id":"20b100d0-84c3-4119-aa9b-0c632b0b6a3a","details":{"type":"nexus","internal_address":"[fd00:1122:3344:104::3]:12221","external_ip":"172.20.26.4","nic":{"id":"364b0ecd-bf08-4cac-a993-bbf4a70564c7","kind":{"type":"service","id":"20b100d0-84c3-4119-aa9b-0c632b0b6a3a"},"name":"nexus-20b100d0-84c3-4119-aa9b-0c632b0b6a3a","ip":"172.30.2.6","mac":"A8:40:25:FF:B4:C1","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","9.9.9.9"]}}]},"root":"/pool/ext/c87d16b8-e814-4159-8562-f8d7fdd19d13/crypt/zone"},{"zone":{"id":"8bc0f29e-0c20-437e-b8ca-7b9844acda22","zone_type":"crucible","addresses":["fd00:1122:3344:104::7"],"dataset":{"id":"8bc0f29e-0c20-437e-b8ca-7b9844acda22","name":{"pool_name":"oxp_613b58fc-5a80-42dc-a61c-b143cf220fb5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::7]:32345"},"services":[{"id":"8bc0f29e-0c20-437e-b8ca-7b9844acda22","details":{"type":"crucible","address":"[fd00:1122:3344:104::7]:32345"}}]},"root":"/pool/ext/56e32a8f-0877-4437-9cab-94a4928b1495/crypt/zone"},{"zone":{"id":"c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55","zone_type":"ntp","addresses":["fd00:1122:3344:104::e"],"dataset":null,"services":[{"id":"c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55","details":{"type":"boundary_ntp","address":"[fd00:1122:3344:104::e]:123","ntp_servers":["ntp.eng.oxide.computer"],"dns_servers":["1.1.1.1","9.9.9.9"],"domain":null,"nic":{"id":"a4b9bacf-6c04-431a-81ad-9bf0302af96e","kind":{"type":"service","id":"c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55"},"name":"ntp-c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55","ip":"172.30.3.5","mac":"A8:40:25:FF:B2:52","subnet":"172.30.3.0/24","vni":100,"primary":true,"slot":0},"snat_cfg":{"ip":"172.20.26.6","first_port":0,"last_port":16383}}}]},"root":"/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone"},{"zone":{"id":"51c9ad09-7814-4643-8ad4-689ccbe53fbd","zone_type":"internal_dns","addresses":["fd00:1122:3344:1::1"],"dataset":{"id":"51c9ad09-7814-4643-8ad4-689ccbe53fbd","name":{"pool_name":"oxp_56e32a8f-0877-4437-9cab-94a4928b1495","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:1::1]:5353"},"services":[{"id":"51c9ad09-7814-4643-8ad4-689ccbe53fbd","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:1::1]:5353","dns_address":"[fd00:1122:3344:1::1]:53","gz_address":"fd00:1122:3344:1::2","gz_address_index":0}}]},"root":"/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled14.json b/sled-agent/tests/old-service-ledgers/rack2-sled14.json new file mode 100644 index 0000000000..421e21d84d --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled14.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"ee8b2cfa-87fe-46a6-98ef-23640b80a968","zone_type":"crucible","addresses":["fd00:1122:3344:10b::d"],"dataset":{"id":"ee8b2cfa-87fe-46a6-98ef-23640b80a968","name":{"pool_name":"oxp_4a624324-003a-4255-98e8-546a90b5b7fa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::d]:32345"},"services":[{"id":"ee8b2cfa-87fe-46a6-98ef-23640b80a968","details":{"type":"crucible","address":"[fd00:1122:3344:10b::d]:32345"}}]},"root":"/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone"},{"zone":{"id":"9228f8ca-2a83-439f-9cb7-f2801b5fea27","zone_type":"crucible","addresses":["fd00:1122:3344:10b::6"],"dataset":{"id":"9228f8ca-2a83-439f-9cb7-f2801b5fea27","name":{"pool_name":"oxp_6b9ec5f1-859f-459c-9c06-6a51ba87786f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::6]:32345"},"services":[{"id":"9228f8ca-2a83-439f-9cb7-f2801b5fea27","details":{"type":"crucible","address":"[fd00:1122:3344:10b::6]:32345"}}]},"root":"/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone"},{"zone":{"id":"ee44cdde-7ac9-4469-9f1d-e8bcfeb5cc46","zone_type":"crucible","addresses":["fd00:1122:3344:10b::e"],"dataset":{"id":"ee44cdde-7ac9-4469-9f1d-e8bcfeb5cc46","name":{"pool_name":"oxp_11b02ce7-7e50-486f-86c2-de8af9575a45","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::e]:32345"},"services":[{"id":"ee44cdde-7ac9-4469-9f1d-e8bcfeb5cc46","details":{"type":"crucible","address":"[fd00:1122:3344:10b::e]:32345"}}]},"root":"/pool/ext/11b02ce7-7e50-486f-86c2-de8af9575a45/crypt/zone"},{"zone":{"id":"96bac0b1-8b34-4c81-9e76-6404d2c37630","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:10b::4"],"dataset":null,"services":[{"id":"96bac0b1-8b34-4c81-9e76-6404d2c37630","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:10b::4]:17000"}}]},"root":"/pool/ext/350b2814-7b7f-40f1-9bf6-9818a1ef49bb/crypt/zone"},{"zone":{"id":"d4e1e554-7b98-4413-809e-4a42561c3d0c","zone_type":"crucible","addresses":["fd00:1122:3344:10b::a"],"dataset":{"id":"d4e1e554-7b98-4413-809e-4a42561c3d0c","name":{"pool_name":"oxp_e6d2fe1d-c74d-40cd-8fae-bc7d06bdaac8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::a]:32345"},"services":[{"id":"d4e1e554-7b98-4413-809e-4a42561c3d0c","details":{"type":"crucible","address":"[fd00:1122:3344:10b::a]:32345"}}]},"root":"/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone"},{"zone":{"id":"1dd69b02-a032-46c3-8e2a-5012e8314455","zone_type":"crucible","addresses":["fd00:1122:3344:10b::b"],"dataset":{"id":"1dd69b02-a032-46c3-8e2a-5012e8314455","name":{"pool_name":"oxp_350b2814-7b7f-40f1-9bf6-9818a1ef49bb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::b]:32345"},"services":[{"id":"1dd69b02-a032-46c3-8e2a-5012e8314455","details":{"type":"crucible","address":"[fd00:1122:3344:10b::b]:32345"}}]},"root":"/pool/ext/350b2814-7b7f-40f1-9bf6-9818a1ef49bb/crypt/zone"},{"zone":{"id":"921f7752-d2f3-40df-a739-5cb1390abc2c","zone_type":"crucible","addresses":["fd00:1122:3344:10b::8"],"dataset":{"id":"921f7752-d2f3-40df-a739-5cb1390abc2c","name":{"pool_name":"oxp_2d1ebe24-6deb-4f81-8450-6842de28126c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::8]:32345"},"services":[{"id":"921f7752-d2f3-40df-a739-5cb1390abc2c","details":{"type":"crucible","address":"[fd00:1122:3344:10b::8]:32345"}}]},"root":"/pool/ext/91ea7bb6-2be7-4498-9b0d-a0521509ec00/crypt/zone"},{"zone":{"id":"609b25e8-9750-4308-ae6f-7202907a3675","zone_type":"crucible","addresses":["fd00:1122:3344:10b::9"],"dataset":{"id":"609b25e8-9750-4308-ae6f-7202907a3675","name":{"pool_name":"oxp_91ea7bb6-2be7-4498-9b0d-a0521509ec00","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::9]:32345"},"services":[{"id":"609b25e8-9750-4308-ae6f-7202907a3675","details":{"type":"crucible","address":"[fd00:1122:3344:10b::9]:32345"}}]},"root":"/pool/ext/2d1ebe24-6deb-4f81-8450-6842de28126c/crypt/zone"},{"zone":{"id":"a232eba2-e94f-4592-a5a6-ec23f9be3296","zone_type":"crucible","addresses":["fd00:1122:3344:10b::5"],"dataset":{"id":"a232eba2-e94f-4592-a5a6-ec23f9be3296","name":{"pool_name":"oxp_e12f29b8-1ab8-431e-bc96-1c1298947980","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::5]:32345"},"services":[{"id":"a232eba2-e94f-4592-a5a6-ec23f9be3296","details":{"type":"crucible","address":"[fd00:1122:3344:10b::5]:32345"}}]},"root":"/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/zone"},{"zone":{"id":"800d1758-9312-4b1a-8f02-dc6d644c2a9b","zone_type":"crucible","addresses":["fd00:1122:3344:10b::c"],"dataset":{"id":"800d1758-9312-4b1a-8f02-dc6d644c2a9b","name":{"pool_name":"oxp_b6932bb0-bab8-4876-914a-9c75a600e794","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::c]:32345"},"services":[{"id":"800d1758-9312-4b1a-8f02-dc6d644c2a9b","details":{"type":"crucible","address":"[fd00:1122:3344:10b::c]:32345"}}]},"root":"/pool/ext/b6932bb0-bab8-4876-914a-9c75a600e794/crypt/zone"},{"zone":{"id":"668a4d4a-96dc-4b45-866b-bed3d64c26ec","zone_type":"crucible","addresses":["fd00:1122:3344:10b::7"],"dataset":{"id":"668a4d4a-96dc-4b45-866b-bed3d64c26ec","name":{"pool_name":"oxp_021afd19-2f87-4def-9284-ab7add1dd6ae","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::7]:32345"},"services":[{"id":"668a4d4a-96dc-4b45-866b-bed3d64c26ec","details":{"type":"crucible","address":"[fd00:1122:3344:10b::7]:32345"}}]},"root":"/pool/ext/91ea7bb6-2be7-4498-9b0d-a0521509ec00/crypt/zone"},{"zone":{"id":"8bbea076-ff60-4330-8302-383e18140ef3","zone_type":"cockroach_db","addresses":["fd00:1122:3344:10b::3"],"dataset":{"id":"8bbea076-ff60-4330-8302-383e18140ef3","name":{"pool_name":"oxp_e12f29b8-1ab8-431e-bc96-1c1298947980","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:10b::3]:32221"},"services":[{"id":"8bbea076-ff60-4330-8302-383e18140ef3","details":{"type":"cockroach_db","address":"[fd00:1122:3344:10b::3]:32221"}}]},"root":"/pool/ext/4a624324-003a-4255-98e8-546a90b5b7fa/crypt/zone"},{"zone":{"id":"3ccea933-89f2-4ce5-8367-efb0afeffe97","zone_type":"ntp","addresses":["fd00:1122:3344:10b::f"],"dataset":null,"services":[{"id":"3ccea933-89f2-4ce5-8367-efb0afeffe97","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10b::f]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/4a624324-003a-4255-98e8-546a90b5b7fa/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled16.json b/sled-agent/tests/old-service-ledgers/rack2-sled16.json new file mode 100644 index 0000000000..c928e004b2 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled16.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"b12aa520-a769-4eac-b56b-09960550a831","zone_type":"crucible","addresses":["fd00:1122:3344:108::7"],"dataset":{"id":"b12aa520-a769-4eac-b56b-09960550a831","name":{"pool_name":"oxp_34dadf3f-f60c-4acc-b82b-4b0c82224222","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::7]:32345"},"services":[{"id":"b12aa520-a769-4eac-b56b-09960550a831","details":{"type":"crucible","address":"[fd00:1122:3344:108::7]:32345"}}]},"root":"/pool/ext/8be8c577-23ac-452e-a205-6d9c95088f61/crypt/zone"},{"zone":{"id":"9bdc40ee-ccba-4d18-9efb-a30596e2d290","zone_type":"crucible","addresses":["fd00:1122:3344:108::d"],"dataset":{"id":"9bdc40ee-ccba-4d18-9efb-a30596e2d290","name":{"pool_name":"oxp_eb81728c-3b83-42fb-8133-ac32a0bdf70f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::d]:32345"},"services":[{"id":"9bdc40ee-ccba-4d18-9efb-a30596e2d290","details":{"type":"crucible","address":"[fd00:1122:3344:108::d]:32345"}}]},"root":"/pool/ext/8be8c577-23ac-452e-a205-6d9c95088f61/crypt/zone"},{"zone":{"id":"c9a367c7-64d7-48e4-b484-9ecb4e8faea7","zone_type":"crucible","addresses":["fd00:1122:3344:108::9"],"dataset":{"id":"c9a367c7-64d7-48e4-b484-9ecb4e8faea7","name":{"pool_name":"oxp_76ab5a67-e20f-4bf0-87b3-01fcc4144bd2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::9]:32345"},"services":[{"id":"c9a367c7-64d7-48e4-b484-9ecb4e8faea7","details":{"type":"crucible","address":"[fd00:1122:3344:108::9]:32345"}}]},"root":"/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone"},{"zone":{"id":"bc5124d8-65e8-4879-bfac-64d59003d482","zone_type":"crucible","addresses":["fd00:1122:3344:108::a"],"dataset":{"id":"bc5124d8-65e8-4879-bfac-64d59003d482","name":{"pool_name":"oxp_5fac7a1d-e855-46e1-b8c2-dd848ac4fee6","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::a]:32345"},"services":[{"id":"bc5124d8-65e8-4879-bfac-64d59003d482","details":{"type":"crucible","address":"[fd00:1122:3344:108::a]:32345"}}]},"root":"/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone"},{"zone":{"id":"5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a","zone_type":"crucible","addresses":["fd00:1122:3344:108::c"],"dataset":{"id":"5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a","name":{"pool_name":"oxp_0c4ef358-5533-43db-ad38-a8eff716e53a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::c]:32345"},"services":[{"id":"5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a","details":{"type":"crucible","address":"[fd00:1122:3344:108::c]:32345"}}]},"root":"/pool/ext/6d3e9cc6-f03b-4055-9785-05711d5e4fdc/crypt/zone"},{"zone":{"id":"3b767edf-a72d-4d80-a0fc-65d6801ed0e0","zone_type":"crucible","addresses":["fd00:1122:3344:108::e"],"dataset":{"id":"3b767edf-a72d-4d80-a0fc-65d6801ed0e0","name":{"pool_name":"oxp_f522118c-5dcd-4116-8044-07f0cceec52e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::e]:32345"},"services":[{"id":"3b767edf-a72d-4d80-a0fc-65d6801ed0e0","details":{"type":"crucible","address":"[fd00:1122:3344:108::e]:32345"}}]},"root":"/pool/ext/5fac7a1d-e855-46e1-b8c2-dd848ac4fee6/crypt/zone"},{"zone":{"id":"f3c02ed6-fbc5-45c3-a030-409f74b450fd","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:108::4"],"dataset":null,"services":[{"id":"f3c02ed6-fbc5-45c3-a030-409f74b450fd","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:108::4]:17000"}}]},"root":"/pool/ext/eb81728c-3b83-42fb-8133-ac32a0bdf70f/crypt/zone"},{"zone":{"id":"85bd9bdb-1ec5-4a8d-badb-8b5d502546a1","zone_type":"crucible","addresses":["fd00:1122:3344:108::5"],"dataset":{"id":"85bd9bdb-1ec5-4a8d-badb-8b5d502546a1","name":{"pool_name":"oxp_416232c1-bc8f-403f-bacb-28403dd8fced","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::5]:32345"},"services":[{"id":"85bd9bdb-1ec5-4a8d-badb-8b5d502546a1","details":{"type":"crucible","address":"[fd00:1122:3344:108::5]:32345"}}]},"root":"/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone"},{"zone":{"id":"d2f1c3df-d4e0-4469-b50e-f1871da86ebf","zone_type":"crucible","addresses":["fd00:1122:3344:108::6"],"dataset":{"id":"d2f1c3df-d4e0-4469-b50e-f1871da86ebf","name":{"pool_name":"oxp_6d3e9cc6-f03b-4055-9785-05711d5e4fdc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::6]:32345"},"services":[{"id":"d2f1c3df-d4e0-4469-b50e-f1871da86ebf","details":{"type":"crucible","address":"[fd00:1122:3344:108::6]:32345"}}]},"root":"/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone"},{"zone":{"id":"88fe3c12-4c55-47df-b4ee-ed26b795439d","zone_type":"crucible","addresses":["fd00:1122:3344:108::8"],"dataset":{"id":"88fe3c12-4c55-47df-b4ee-ed26b795439d","name":{"pool_name":"oxp_8be8c577-23ac-452e-a205-6d9c95088f61","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::8]:32345"},"services":[{"id":"88fe3c12-4c55-47df-b4ee-ed26b795439d","details":{"type":"crucible","address":"[fd00:1122:3344:108::8]:32345"}}]},"root":"/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone"},{"zone":{"id":"4d20175a-588b-44b8-8b9c-b16c6c3a97a0","zone_type":"crucible","addresses":["fd00:1122:3344:108::b"],"dataset":{"id":"4d20175a-588b-44b8-8b9c-b16c6c3a97a0","name":{"pool_name":"oxp_a726cacd-fa35-4ed2-ade6-31ad928b24cb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::b]:32345"},"services":[{"id":"4d20175a-588b-44b8-8b9c-b16c6c3a97a0","details":{"type":"crucible","address":"[fd00:1122:3344:108::b]:32345"}}]},"root":"/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone"},{"zone":{"id":"e86845b5-eabd-49f5-9a10-6dfef9066209","zone_type":"cockroach_db","addresses":["fd00:1122:3344:108::3"],"dataset":{"id":"e86845b5-eabd-49f5-9a10-6dfef9066209","name":{"pool_name":"oxp_416232c1-bc8f-403f-bacb-28403dd8fced","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:108::3]:32221"},"services":[{"id":"e86845b5-eabd-49f5-9a10-6dfef9066209","details":{"type":"cockroach_db","address":"[fd00:1122:3344:108::3]:32221"}}]},"root":"/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone"},{"zone":{"id":"209b6213-588b-43b6-a89b-19ee5c84ffba","zone_type":"ntp","addresses":["fd00:1122:3344:108::f"],"dataset":null,"services":[{"id":"209b6213-588b-43b6-a89b-19ee5c84ffba","details":{"type":"internal_ntp","address":"[fd00:1122:3344:108::f]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled17.json b/sled-agent/tests/old-service-ledgers/rack2-sled17.json new file mode 100644 index 0000000000..93872adf13 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled17.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"90b53c3d-42fa-4ca9-bbfc-96fff245b508","zone_type":"crucible","addresses":["fd00:1122:3344:109::4"],"dataset":{"id":"90b53c3d-42fa-4ca9-bbfc-96fff245b508","name":{"pool_name":"oxp_ae56280b-17ce-4266-8573-e1da9db6c6bb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::4]:32345"},"services":[{"id":"90b53c3d-42fa-4ca9-bbfc-96fff245b508","details":{"type":"crucible","address":"[fd00:1122:3344:109::4]:32345"}}]},"root":"/pool/ext/b0e1a261-b932-47c4-81e9-1977275ae9d9/crypt/zone"},{"zone":{"id":"4f9f2e1d-be04-4e8b-a50b-ffb18557a650","zone_type":"crucible","addresses":["fd00:1122:3344:109::5"],"dataset":{"id":"4f9f2e1d-be04-4e8b-a50b-ffb18557a650","name":{"pool_name":"oxp_d5b07362-64db-4b18-a3e9-8d7cbabae2d5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::5]:32345"},"services":[{"id":"4f9f2e1d-be04-4e8b-a50b-ffb18557a650","details":{"type":"crucible","address":"[fd00:1122:3344:109::5]:32345"}}]},"root":"/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone"},{"zone":{"id":"2fa5671d-3109-4f11-ae70-1280f4fa3b89","zone_type":"crucible","addresses":["fd00:1122:3344:109::6"],"dataset":{"id":"2fa5671d-3109-4f11-ae70-1280f4fa3b89","name":{"pool_name":"oxp_9ba7bfbf-b9a2-4237-a142-94c1e68de984","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::6]:32345"},"services":[{"id":"2fa5671d-3109-4f11-ae70-1280f4fa3b89","details":{"type":"crucible","address":"[fd00:1122:3344:109::6]:32345"}}]},"root":"/pool/ext/3cafbb47-c194-4a42-99ff-34dfeab999ed/crypt/zone"},{"zone":{"id":"b63c6882-ca90-4156-b561-4781ab4a0962","zone_type":"crucible","addresses":["fd00:1122:3344:109::7"],"dataset":{"id":"b63c6882-ca90-4156-b561-4781ab4a0962","name":{"pool_name":"oxp_b0e1a261-b932-47c4-81e9-1977275ae9d9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::7]:32345"},"services":[{"id":"b63c6882-ca90-4156-b561-4781ab4a0962","details":{"type":"crucible","address":"[fd00:1122:3344:109::7]:32345"}}]},"root":"/pool/ext/d5b07362-64db-4b18-a3e9-8d7cbabae2d5/crypt/zone"},{"zone":{"id":"f71344eb-f7e2-439d-82a0-9941e6868fb6","zone_type":"crucible","addresses":["fd00:1122:3344:109::9"],"dataset":{"id":"f71344eb-f7e2-439d-82a0-9941e6868fb6","name":{"pool_name":"oxp_027a82e8-daa3-4fa6-8205-ed03445e1086","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::9]:32345"},"services":[{"id":"f71344eb-f7e2-439d-82a0-9941e6868fb6","details":{"type":"crucible","address":"[fd00:1122:3344:109::9]:32345"}}]},"root":"/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone"},{"zone":{"id":"a60cf0d7-12d5-43cb-aa3f-7a9e84de08fb","zone_type":"crucible","addresses":["fd00:1122:3344:109::a"],"dataset":{"id":"a60cf0d7-12d5-43cb-aa3f-7a9e84de08fb","name":{"pool_name":"oxp_8736aaf9-4d72-42b1-8e4f-07644d999c8b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::a]:32345"},"services":[{"id":"a60cf0d7-12d5-43cb-aa3f-7a9e84de08fb","details":{"type":"crucible","address":"[fd00:1122:3344:109::a]:32345"}}]},"root":"/pool/ext/8736aaf9-4d72-42b1-8e4f-07644d999c8b/crypt/zone"},{"zone":{"id":"5d0e03b2-8958-4c43-8851-bf819f102958","zone_type":"crucible","addresses":["fd00:1122:3344:109::8"],"dataset":{"id":"5d0e03b2-8958-4c43-8851-bf819f102958","name":{"pool_name":"oxp_62426615-7832-49e7-9426-e39ffeb42c69","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::8]:32345"},"services":[{"id":"5d0e03b2-8958-4c43-8851-bf819f102958","details":{"type":"crucible","address":"[fd00:1122:3344:109::8]:32345"}}]},"root":"/pool/ext/07fc8ec9-1216-4d98-be34-c2970b585e61/crypt/zone"},{"zone":{"id":"accc05a2-ec80-4856-a825-ec6b7f700eaa","zone_type":"crucible","addresses":["fd00:1122:3344:109::d"],"dataset":{"id":"accc05a2-ec80-4856-a825-ec6b7f700eaa","name":{"pool_name":"oxp_dc083c53-7014-4482-8a79-f338ba2b0fb4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::d]:32345"},"services":[{"id":"accc05a2-ec80-4856-a825-ec6b7f700eaa","details":{"type":"crucible","address":"[fd00:1122:3344:109::d]:32345"}}]},"root":"/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone"},{"zone":{"id":"2e32fdcc-737a-4430-8290-cb7028ea4d50","zone_type":"crucible","addresses":["fd00:1122:3344:109::b"],"dataset":{"id":"2e32fdcc-737a-4430-8290-cb7028ea4d50","name":{"pool_name":"oxp_3cafbb47-c194-4a42-99ff-34dfeab999ed","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::b]:32345"},"services":[{"id":"2e32fdcc-737a-4430-8290-cb7028ea4d50","details":{"type":"crucible","address":"[fd00:1122:3344:109::b]:32345"}}]},"root":"/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone"},{"zone":{"id":"a97c6ae2-37f6-4d93-a66e-cb5cd3c6aaa2","zone_type":"crucible","addresses":["fd00:1122:3344:109::c"],"dataset":{"id":"a97c6ae2-37f6-4d93-a66e-cb5cd3c6aaa2","name":{"pool_name":"oxp_07fc8ec9-1216-4d98-be34-c2970b585e61","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::c]:32345"},"services":[{"id":"a97c6ae2-37f6-4d93-a66e-cb5cd3c6aaa2","details":{"type":"crucible","address":"[fd00:1122:3344:109::c]:32345"}}]},"root":"/pool/ext/07fc8ec9-1216-4d98-be34-c2970b585e61/crypt/zone"},{"zone":{"id":"3237a532-acaa-4ebe-bf11-dde794fea739","zone_type":"cockroach_db","addresses":["fd00:1122:3344:109::3"],"dataset":{"id":"3237a532-acaa-4ebe-bf11-dde794fea739","name":{"pool_name":"oxp_ae56280b-17ce-4266-8573-e1da9db6c6bb","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:109::3]:32221"},"services":[{"id":"3237a532-acaa-4ebe-bf11-dde794fea739","details":{"type":"cockroach_db","address":"[fd00:1122:3344:109::3]:32221"}}]},"root":"/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone"},{"zone":{"id":"83257100-5590-484a-b72a-a079389d8da6","zone_type":"ntp","addresses":["fd00:1122:3344:109::e"],"dataset":null,"services":[{"id":"83257100-5590-484a-b72a-a079389d8da6","details":{"type":"internal_ntp","address":"[fd00:1122:3344:109::e]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/3cafbb47-c194-4a42-99ff-34dfeab999ed/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled21.json b/sled-agent/tests/old-service-ledgers/rack2-sled21.json new file mode 100644 index 0000000000..78e003f79e --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled21.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"0437b69d-73a8-4231-86f9-6b5556e7e7ef","zone_type":"crucible","addresses":["fd00:1122:3344:102::5"],"dataset":{"id":"0437b69d-73a8-4231-86f9-6b5556e7e7ef","name":{"pool_name":"oxp_aa0ffe35-76db-42ab-adf2-ceb072bdf811","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::5]:32345"},"services":[{"id":"0437b69d-73a8-4231-86f9-6b5556e7e7ef","details":{"type":"crucible","address":"[fd00:1122:3344:102::5]:32345"}}]},"root":"/pool/ext/0d2805da-6d24-4e57-a700-0c3865c05544/crypt/zone"},{"zone":{"id":"47234ca5-305f-436a-9e9a-36bca9667680","zone_type":"crucible","addresses":["fd00:1122:3344:102::b"],"dataset":{"id":"47234ca5-305f-436a-9e9a-36bca9667680","name":{"pool_name":"oxp_0d2805da-6d24-4e57-a700-0c3865c05544","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::b]:32345"},"services":[{"id":"47234ca5-305f-436a-9e9a-36bca9667680","details":{"type":"crucible","address":"[fd00:1122:3344:102::b]:32345"}}]},"root":"/pool/ext/160691d8-33a1-4d7d-a48a-c3fd27d76822/crypt/zone"},{"zone":{"id":"2898657e-4141-4c05-851b-147bffc6bbbd","zone_type":"nexus","addresses":["fd00:1122:3344:102::3"],"dataset":null,"services":[{"id":"2898657e-4141-4c05-851b-147bffc6bbbd","details":{"type":"nexus","internal_address":"[fd00:1122:3344:102::3]:12221","external_ip":"172.20.26.5","nic":{"id":"2e9a412e-c79a-48fe-8fa4-f5a6afed1040","kind":{"type":"service","id":"2898657e-4141-4c05-851b-147bffc6bbbd"},"name":"nexus-2898657e-4141-4c05-851b-147bffc6bbbd","ip":"172.30.2.7","mac":"A8:40:25:FF:C6:59","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","9.9.9.9"]}}]},"root":"/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone"},{"zone":{"id":"cf98c4d6-4a7b-49c0-9b14-48a8adf52ce9","zone_type":"crucible","addresses":["fd00:1122:3344:102::c"],"dataset":{"id":"cf98c4d6-4a7b-49c0-9b14-48a8adf52ce9","name":{"pool_name":"oxp_c0b4ecc1-a145-443f-90d1-2e8136b007bc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::c]:32345"},"services":[{"id":"cf98c4d6-4a7b-49c0-9b14-48a8adf52ce9","details":{"type":"crucible","address":"[fd00:1122:3344:102::c]:32345"}}]},"root":"/pool/ext/f6acd70a-d6cb-464d-a460-dd5c60301562/crypt/zone"},{"zone":{"id":"13c1e91e-bfcc-4eea-8185-412fc37fdea3","zone_type":"crucible","addresses":["fd00:1122:3344:102::9"],"dataset":{"id":"13c1e91e-bfcc-4eea-8185-412fc37fdea3","name":{"pool_name":"oxp_e9b0a2e4-8060-41bd-a3b5-d0642246d06d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::9]:32345"},"services":[{"id":"13c1e91e-bfcc-4eea-8185-412fc37fdea3","details":{"type":"crucible","address":"[fd00:1122:3344:102::9]:32345"}}]},"root":"/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone"},{"zone":{"id":"c9cb60af-9e0e-4b3b-b971-53138a9b8d27","zone_type":"crucible","addresses":["fd00:1122:3344:102::4"],"dataset":{"id":"c9cb60af-9e0e-4b3b-b971-53138a9b8d27","name":{"pool_name":"oxp_77749ec7-39a9-489d-904b-87f7223c4e3c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::4]:32345"},"services":[{"id":"c9cb60af-9e0e-4b3b-b971-53138a9b8d27","details":{"type":"crucible","address":"[fd00:1122:3344:102::4]:32345"}}]},"root":"/pool/ext/77749ec7-39a9-489d-904b-87f7223c4e3c/crypt/zone"},{"zone":{"id":"32995cfa-47ec-4b84-8514-7c1c8a86c19d","zone_type":"crucible","addresses":["fd00:1122:3344:102::8"],"dataset":{"id":"32995cfa-47ec-4b84-8514-7c1c8a86c19d","name":{"pool_name":"oxp_eac83f81-eb51-4f3e-874e-82f55dd952ba","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::8]:32345"},"services":[{"id":"32995cfa-47ec-4b84-8514-7c1c8a86c19d","details":{"type":"crucible","address":"[fd00:1122:3344:102::8]:32345"}}]},"root":"/pool/ext/0d2805da-6d24-4e57-a700-0c3865c05544/crypt/zone"},{"zone":{"id":"b93d2e2d-d54b-4503-85c3-9878e3cee9c7","zone_type":"crucible","addresses":["fd00:1122:3344:102::a"],"dataset":{"id":"b93d2e2d-d54b-4503-85c3-9878e3cee9c7","name":{"pool_name":"oxp_160691d8-33a1-4d7d-a48a-c3fd27d76822","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::a]:32345"},"services":[{"id":"b93d2e2d-d54b-4503-85c3-9878e3cee9c7","details":{"type":"crucible","address":"[fd00:1122:3344:102::a]:32345"}}]},"root":"/pool/ext/138663ad-a382-4595-baf0-08f6b0276a67/crypt/zone"},{"zone":{"id":"2ebbac4f-7b0f-43eb-99fd-dd6ff7f9e097","zone_type":"crucible","addresses":["fd00:1122:3344:102::6"],"dataset":{"id":"2ebbac4f-7b0f-43eb-99fd-dd6ff7f9e097","name":{"pool_name":"oxp_138663ad-a382-4595-baf0-08f6b0276a67","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::6]:32345"},"services":[{"id":"2ebbac4f-7b0f-43eb-99fd-dd6ff7f9e097","details":{"type":"crucible","address":"[fd00:1122:3344:102::6]:32345"}}]},"root":"/pool/ext/e9b0a2e4-8060-41bd-a3b5-d0642246d06d/crypt/zone"},{"zone":{"id":"d0eea3b2-e5ac-42bf-97b7-531b78fa06d1","zone_type":"crucible","addresses":["fd00:1122:3344:102::7"],"dataset":{"id":"d0eea3b2-e5ac-42bf-97b7-531b78fa06d1","name":{"pool_name":"oxp_69f0b863-f73f-42b2-9822-b2cb99f09003","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::7]:32345"},"services":[{"id":"d0eea3b2-e5ac-42bf-97b7-531b78fa06d1","details":{"type":"crucible","address":"[fd00:1122:3344:102::7]:32345"}}]},"root":"/pool/ext/138663ad-a382-4595-baf0-08f6b0276a67/crypt/zone"},{"zone":{"id":"2b34cd1d-ea7d-41a1-82b9-75550fdf6eb0","zone_type":"crucible","addresses":["fd00:1122:3344:102::d"],"dataset":{"id":"2b34cd1d-ea7d-41a1-82b9-75550fdf6eb0","name":{"pool_name":"oxp_f6acd70a-d6cb-464d-a460-dd5c60301562","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::d]:32345"},"services":[{"id":"2b34cd1d-ea7d-41a1-82b9-75550fdf6eb0","details":{"type":"crucible","address":"[fd00:1122:3344:102::d]:32345"}}]},"root":"/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone"},{"zone":{"id":"6ea2684c-115e-48a6-8453-ab52d1cecd73","zone_type":"ntp","addresses":["fd00:1122:3344:102::e"],"dataset":null,"services":[{"id":"6ea2684c-115e-48a6-8453-ab52d1cecd73","details":{"type":"boundary_ntp","address":"[fd00:1122:3344:102::e]:123","ntp_servers":["ntp.eng.oxide.computer"],"dns_servers":["1.1.1.1","9.9.9.9"],"domain":null,"nic":{"id":"4effd079-ed4e-4cf6-8545-bb9574f516d2","kind":{"type":"service","id":"6ea2684c-115e-48a6-8453-ab52d1cecd73"},"name":"ntp-6ea2684c-115e-48a6-8453-ab52d1cecd73","ip":"172.30.3.6","mac":"A8:40:25:FF:A0:F9","subnet":"172.30.3.0/24","vni":100,"primary":true,"slot":0},"snat_cfg":{"ip":"172.20.26.7","first_port":16384,"last_port":32767}}}]},"root":"/pool/ext/aa0ffe35-76db-42ab-adf2-ceb072bdf811/crypt/zone"},{"zone":{"id":"3a1ea15f-06a4-4afd-959a-c3a00b2bdd80","zone_type":"internal_dns","addresses":["fd00:1122:3344:2::1"],"dataset":{"id":"3a1ea15f-06a4-4afd-959a-c3a00b2bdd80","name":{"pool_name":"oxp_77749ec7-39a9-489d-904b-87f7223c4e3c","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:2::1]:5353"},"services":[{"id":"3a1ea15f-06a4-4afd-959a-c3a00b2bdd80","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:2::1]:5353","dns_address":"[fd00:1122:3344:2::1]:53","gz_address":"fd00:1122:3344:2::2","gz_address_index":1}}]},"root":"/pool/ext/69f0b863-f73f-42b2-9822-b2cb99f09003/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled23.json b/sled-agent/tests/old-service-ledgers/rack2-sled23.json new file mode 100644 index 0000000000..29b8c455d3 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled23.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"1876cdcf-b2e7-4b79-ad2e-67df716e1860","zone_type":"crucible","addresses":["fd00:1122:3344:10a::8"],"dataset":{"id":"1876cdcf-b2e7-4b79-ad2e-67df716e1860","name":{"pool_name":"oxp_d4c6bdc6-5e99-4f6c-b57a-9bfcb9a76be4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::8]:32345"},"services":[{"id":"1876cdcf-b2e7-4b79-ad2e-67df716e1860","details":{"type":"crucible","address":"[fd00:1122:3344:10a::8]:32345"}}]},"root":"/pool/ext/86c58ea3-1413-4af3-9aff-9c0a3d758459/crypt/zone"},{"zone":{"id":"0e708ee3-b7a6-4993-a88a-4489add33e29","zone_type":"crucible","addresses":["fd00:1122:3344:10a::d"],"dataset":{"id":"0e708ee3-b7a6-4993-a88a-4489add33e29","name":{"pool_name":"oxp_718ad834-b415-4abb-934d-9f987cde0a96","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::d]:32345"},"services":[{"id":"0e708ee3-b7a6-4993-a88a-4489add33e29","details":{"type":"crucible","address":"[fd00:1122:3344:10a::d]:32345"}}]},"root":"/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone"},{"zone":{"id":"4e1b9a65-848f-4649-b360-1df0d135b44d","zone_type":"crucible","addresses":["fd00:1122:3344:10a::c"],"dataset":{"id":"4e1b9a65-848f-4649-b360-1df0d135b44d","name":{"pool_name":"oxp_88ee08c6-1c0f-44c2-9110-b8d5a7589ebb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::c]:32345"},"services":[{"id":"4e1b9a65-848f-4649-b360-1df0d135b44d","details":{"type":"crucible","address":"[fd00:1122:3344:10a::c]:32345"}}]},"root":"/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone"},{"zone":{"id":"da510a57-3af1-4d2b-b2ed-2e8849f27d8b","zone_type":"oximeter","addresses":["fd00:1122:3344:10a::3"],"dataset":null,"services":[{"id":"da510a57-3af1-4d2b-b2ed-2e8849f27d8b","details":{"type":"oximeter","address":"[fd00:1122:3344:10a::3]:12223"}}]},"root":"/pool/ext/718ad834-b415-4abb-934d-9f987cde0a96/crypt/zone"},{"zone":{"id":"d4d9acc8-3e0b-4fab-a0a2-d21920fabd7e","zone_type":"crucible","addresses":["fd00:1122:3344:10a::6"],"dataset":{"id":"d4d9acc8-3e0b-4fab-a0a2-d21920fabd7e","name":{"pool_name":"oxp_9dfe424f-cba6-4bfb-a3dd-e8bd7fdea57d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::6]:32345"},"services":[{"id":"d4d9acc8-3e0b-4fab-a0a2-d21920fabd7e","details":{"type":"crucible","address":"[fd00:1122:3344:10a::6]:32345"}}]},"root":"/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone"},{"zone":{"id":"fcb75972-836b-4f55-ba21-9722832cf5c2","zone_type":"crucible","addresses":["fd00:1122:3344:10a::7"],"dataset":{"id":"fcb75972-836b-4f55-ba21-9722832cf5c2","name":{"pool_name":"oxp_9005671f-3d90-4ed1-be15-ad65b9a65bd5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::7]:32345"},"services":[{"id":"fcb75972-836b-4f55-ba21-9722832cf5c2","details":{"type":"crucible","address":"[fd00:1122:3344:10a::7]:32345"}}]},"root":"/pool/ext/d4c6bdc6-5e99-4f6c-b57a-9bfcb9a76be4/crypt/zone"},{"zone":{"id":"624beba0-7dcd-4d55-af05-4670c6fcb1fb","zone_type":"crucible","addresses":["fd00:1122:3344:10a::4"],"dataset":{"id":"624beba0-7dcd-4d55-af05-4670c6fcb1fb","name":{"pool_name":"oxp_93867156-a43d-4c03-a899-1535e566c8bd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::4]:32345"},"services":[{"id":"624beba0-7dcd-4d55-af05-4670c6fcb1fb","details":{"type":"crucible","address":"[fd00:1122:3344:10a::4]:32345"}}]},"root":"/pool/ext/93867156-a43d-4c03-a899-1535e566c8bd/crypt/zone"},{"zone":{"id":"26fb3830-898e-4086-afaf-8f9654716b8c","zone_type":"crucible","addresses":["fd00:1122:3344:10a::b"],"dataset":{"id":"26fb3830-898e-4086-afaf-8f9654716b8c","name":{"pool_name":"oxp_86c58ea3-1413-4af3-9aff-9c0a3d758459","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::b]:32345"},"services":[{"id":"26fb3830-898e-4086-afaf-8f9654716b8c","details":{"type":"crucible","address":"[fd00:1122:3344:10a::b]:32345"}}]},"root":"/pool/ext/93867156-a43d-4c03-a899-1535e566c8bd/crypt/zone"},{"zone":{"id":"a3ef7eba-c08e-48ef-ae7a-89e2fcb49b66","zone_type":"crucible","addresses":["fd00:1122:3344:10a::a"],"dataset":{"id":"a3ef7eba-c08e-48ef-ae7a-89e2fcb49b66","name":{"pool_name":"oxp_cd3fdbae-a9d9-4db7-866a-bca36f6dd634","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::a]:32345"},"services":[{"id":"a3ef7eba-c08e-48ef-ae7a-89e2fcb49b66","details":{"type":"crucible","address":"[fd00:1122:3344:10a::a]:32345"}}]},"root":"/pool/ext/718ad834-b415-4abb-934d-9f987cde0a96/crypt/zone"},{"zone":{"id":"5c1d4a02-f33b-433a-81f5-5c149e3433bd","zone_type":"crucible","addresses":["fd00:1122:3344:10a::5"],"dataset":{"id":"5c1d4a02-f33b-433a-81f5-5c149e3433bd","name":{"pool_name":"oxp_9adfc865-2eef-4880-a6e3-9d2f88c8efd0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::5]:32345"},"services":[{"id":"5c1d4a02-f33b-433a-81f5-5c149e3433bd","details":{"type":"crucible","address":"[fd00:1122:3344:10a::5]:32345"}}]},"root":"/pool/ext/cd3fdbae-a9d9-4db7-866a-bca36f6dd634/crypt/zone"},{"zone":{"id":"ee77efe9-81d0-4395-a237-15e30c2c2d04","zone_type":"crucible","addresses":["fd00:1122:3344:10a::9"],"dataset":{"id":"ee77efe9-81d0-4395-a237-15e30c2c2d04","name":{"pool_name":"oxp_30f7d236-c835-46cc-bc27-9099a6826f67","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::9]:32345"},"services":[{"id":"ee77efe9-81d0-4395-a237-15e30c2c2d04","details":{"type":"crucible","address":"[fd00:1122:3344:10a::9]:32345"}}]},"root":"/pool/ext/88ee08c6-1c0f-44c2-9110-b8d5a7589ebb/crypt/zone"},{"zone":{"id":"71ab91b7-48d4-4d31-b47e-59f29f419116","zone_type":"ntp","addresses":["fd00:1122:3344:10a::e"],"dataset":null,"services":[{"id":"71ab91b7-48d4-4d31-b47e-59f29f419116","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10a::e]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/cd3fdbae-a9d9-4db7-866a-bca36f6dd634/crypt/zone"},{"zone":{"id":"46ccd8fe-4e3c-4307-97ae-1f7ac505082a","zone_type":"internal_dns","addresses":["fd00:1122:3344:3::1"],"dataset":{"id":"46ccd8fe-4e3c-4307-97ae-1f7ac505082a","name":{"pool_name":"oxp_93867156-a43d-4c03-a899-1535e566c8bd","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:3::1]:5353"},"services":[{"id":"46ccd8fe-4e3c-4307-97ae-1f7ac505082a","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:3::1]:5353","dns_address":"[fd00:1122:3344:3::1]:53","gz_address":"fd00:1122:3344:3::2","gz_address_index":2}}]},"root":"/pool/ext/9dfe424f-cba6-4bfb-a3dd-e8bd7fdea57d/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled25.json b/sled-agent/tests/old-service-ledgers/rack2-sled25.json new file mode 100644 index 0000000000..e48ef68faa --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled25.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"180d466d-eb36-4546-8922-e52c4c076823","zone_type":"crucible","addresses":["fd00:1122:3344:101::5"],"dataset":{"id":"180d466d-eb36-4546-8922-e52c4c076823","name":{"pool_name":"oxp_ac789935-fa42-4d00-8967-df0d96dbb74e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::5]:32345"},"services":[{"id":"180d466d-eb36-4546-8922-e52c4c076823","details":{"type":"crucible","address":"[fd00:1122:3344:101::5]:32345"}}]},"root":"/pool/ext/d732addc-cfe8-4c2c-8028-72eb4481b04e/crypt/zone"},{"zone":{"id":"b5af0303-bc03-40a3-b733-0396d705dfbf","zone_type":"crucible","addresses":["fd00:1122:3344:101::7"],"dataset":{"id":"b5af0303-bc03-40a3-b733-0396d705dfbf","name":{"pool_name":"oxp_d732addc-cfe8-4c2c-8028-72eb4481b04e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::7]:32345"},"services":[{"id":"b5af0303-bc03-40a3-b733-0396d705dfbf","details":{"type":"crucible","address":"[fd00:1122:3344:101::7]:32345"}}]},"root":"/pool/ext/677b0057-3a80-461b-aca8-c2cb501a7278/crypt/zone"},{"zone":{"id":"9c7c805a-f5ed-4e48-86e3-7aa81a718881","zone_type":"crucible","addresses":["fd00:1122:3344:101::c"],"dataset":{"id":"9c7c805a-f5ed-4e48-86e3-7aa81a718881","name":{"pool_name":"oxp_923c930c-80f8-448d-8321-cebfc6c41760","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::c]:32345"},"services":[{"id":"9c7c805a-f5ed-4e48-86e3-7aa81a718881","details":{"type":"crucible","address":"[fd00:1122:3344:101::c]:32345"}}]},"root":"/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone"},{"zone":{"id":"4e49c83c-2d4a-491a-91ac-4ab022026dcf","zone_type":"crucible","addresses":["fd00:1122:3344:101::4"],"dataset":{"id":"4e49c83c-2d4a-491a-91ac-4ab022026dcf","name":{"pool_name":"oxp_c99e6032-1d4f-47d2-9efe-ae2b2479554e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::4]:32345"},"services":[{"id":"4e49c83c-2d4a-491a-91ac-4ab022026dcf","details":{"type":"crucible","address":"[fd00:1122:3344:101::4]:32345"}}]},"root":"/pool/ext/653065d2-ab70-47c9-b832-34238fdc95ef/crypt/zone"},{"zone":{"id":"0e38475e-b8b2-4813-bf80-3c170081081a","zone_type":"crucible","addresses":["fd00:1122:3344:101::d"],"dataset":{"id":"0e38475e-b8b2-4813-bf80-3c170081081a","name":{"pool_name":"oxp_653065d2-ab70-47c9-b832-34238fdc95ef","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::d]:32345"},"services":[{"id":"0e38475e-b8b2-4813-bf80-3c170081081a","details":{"type":"crucible","address":"[fd00:1122:3344:101::d]:32345"}}]},"root":"/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone"},{"zone":{"id":"75123e60-1116-4b8d-a466-7302220127da","zone_type":"crucible","addresses":["fd00:1122:3344:101::8"],"dataset":{"id":"75123e60-1116-4b8d-a466-7302220127da","name":{"pool_name":"oxp_c764a8ae-6862-4eec-9db0-cc6ea478e4a7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::8]:32345"},"services":[{"id":"75123e60-1116-4b8d-a466-7302220127da","details":{"type":"crucible","address":"[fd00:1122:3344:101::8]:32345"}}]},"root":"/pool/ext/c764a8ae-6862-4eec-9db0-cc6ea478e4a7/crypt/zone"},{"zone":{"id":"fbd0379c-97fa-49ea-8980-17ae30ffff3c","zone_type":"crucible","addresses":["fd00:1122:3344:101::b"],"dataset":{"id":"fbd0379c-97fa-49ea-8980-17ae30ffff3c","name":{"pool_name":"oxp_fcb0e4c7-e046-4cf5-ad35-3ad90e1eb90c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::b]:32345"},"services":[{"id":"fbd0379c-97fa-49ea-8980-17ae30ffff3c","details":{"type":"crucible","address":"[fd00:1122:3344:101::b]:32345"}}]},"root":"/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone"},{"zone":{"id":"ec635326-cd1d-4f73-b8e6-c3a36a7020db","zone_type":"crucible","addresses":["fd00:1122:3344:101::a"],"dataset":{"id":"ec635326-cd1d-4f73-b8e6-c3a36a7020db","name":{"pool_name":"oxp_6bfb4120-488d-4f3d-90ef-e9bfa523b388","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::a]:32345"},"services":[{"id":"ec635326-cd1d-4f73-b8e6-c3a36a7020db","details":{"type":"crucible","address":"[fd00:1122:3344:101::a]:32345"}}]},"root":"/pool/ext/c99e6032-1d4f-47d2-9efe-ae2b2479554e/crypt/zone"},{"zone":{"id":"f500d564-c40a-4eca-ac8a-a26b435f2037","zone_type":"external_dns","addresses":["fd00:1122:3344:101::3"],"dataset":{"id":"f500d564-c40a-4eca-ac8a-a26b435f2037","name":{"pool_name":"oxp_c99e6032-1d4f-47d2-9efe-ae2b2479554e","kind":{"type":"external_dns"}},"service_address":"[fd00:1122:3344:101::3]:5353"},"services":[{"id":"f500d564-c40a-4eca-ac8a-a26b435f2037","details":{"type":"external_dns","http_address":"[fd00:1122:3344:101::3]:5353","dns_address":"172.20.26.2:53","nic":{"id":"b0b42776-3914-4a69-889f-4831dc72327c","kind":{"type":"service","id":"f500d564-c40a-4eca-ac8a-a26b435f2037"},"name":"external-dns-f500d564-c40a-4eca-ac8a-a26b435f2037","ip":"172.30.1.6","mac":"A8:40:25:FF:D0:B4","subnet":"172.30.1.0/24","vni":100,"primary":true,"slot":0}}}]},"root":"/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone"},{"zone":{"id":"56d4dbcc-3b4a-4ed0-8795-7734aadcc4c0","zone_type":"crucible","addresses":["fd00:1122:3344:101::9"],"dataset":{"id":"56d4dbcc-3b4a-4ed0-8795-7734aadcc4c0","name":{"pool_name":"oxp_4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::9]:32345"},"services":[{"id":"56d4dbcc-3b4a-4ed0-8795-7734aadcc4c0","details":{"type":"crucible","address":"[fd00:1122:3344:101::9]:32345"}}]},"root":"/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone"},{"zone":{"id":"0d3a1bd5-f6fe-49cb-807a-190dabc90103","zone_type":"crucible","addresses":["fd00:1122:3344:101::6"],"dataset":{"id":"0d3a1bd5-f6fe-49cb-807a-190dabc90103","name":{"pool_name":"oxp_677b0057-3a80-461b-aca8-c2cb501a7278","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::6]:32345"},"services":[{"id":"0d3a1bd5-f6fe-49cb-807a-190dabc90103","details":{"type":"crucible","address":"[fd00:1122:3344:101::6]:32345"}}]},"root":"/pool/ext/6bfb4120-488d-4f3d-90ef-e9bfa523b388/crypt/zone"},{"zone":{"id":"d34c7184-5d4e-4cb5-8f91-df74a343ffbc","zone_type":"ntp","addresses":["fd00:1122:3344:101::e"],"dataset":null,"services":[{"id":"d34c7184-5d4e-4cb5-8f91-df74a343ffbc","details":{"type":"internal_ntp","address":"[fd00:1122:3344:101::e]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled8.json b/sled-agent/tests/old-service-ledgers/rack2-sled8.json new file mode 100644 index 0000000000..7d52980d9f --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled8.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"7153983f-8fd7-4fb9-92ac-0f07a07798b4","zone_type":"crucible","addresses":["fd00:1122:3344:103::a"],"dataset":{"id":"7153983f-8fd7-4fb9-92ac-0f07a07798b4","name":{"pool_name":"oxp_bf428719-1b16-4503-99f4-ad95846d916f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::a]:32345"},"services":[{"id":"7153983f-8fd7-4fb9-92ac-0f07a07798b4","details":{"type":"crucible","address":"[fd00:1122:3344:103::a]:32345"}}]},"root":"/pool/ext/26e698bb-006d-4208-94b9-d1bc279111fa/crypt/zone"},{"zone":{"id":"7d44ba36-4a69-490a-bc40-f6f90a4208d4","zone_type":"crucible","addresses":["fd00:1122:3344:103::c"],"dataset":{"id":"7d44ba36-4a69-490a-bc40-f6f90a4208d4","name":{"pool_name":"oxp_414e235b-55c3-4dc1-a568-8adf4ea1a052","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::c]:32345"},"services":[{"id":"7d44ba36-4a69-490a-bc40-f6f90a4208d4","details":{"type":"crucible","address":"[fd00:1122:3344:103::c]:32345"}}]},"root":"/pool/ext/cf940e15-dbc5-481b-866a-4de4b018898e/crypt/zone"},{"zone":{"id":"65a11c18-7f59-41ac-b9e7-680627f996e7","zone_type":"nexus","addresses":["fd00:1122:3344:103::3"],"dataset":null,"services":[{"id":"65a11c18-7f59-41ac-b9e7-680627f996e7","details":{"type":"nexus","internal_address":"[fd00:1122:3344:103::3]:12221","external_ip":"172.20.26.3","nic":{"id":"a3e13dde-a2bc-4170-ad84-aad8085b6034","kind":{"type":"service","id":"65a11c18-7f59-41ac-b9e7-680627f996e7"},"name":"nexus-65a11c18-7f59-41ac-b9e7-680627f996e7","ip":"172.30.2.5","mac":"A8:40:25:FF:A6:83","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","9.9.9.9"]}}]},"root":"/pool/ext/e126ddcc-8bee-46ba-8199-2a74df0ba040/crypt/zone"},{"zone":{"id":"072fdae8-2adf-4fd2-94ce-e9b0663b91e7","zone_type":"crucible","addresses":["fd00:1122:3344:103::b"],"dataset":{"id":"072fdae8-2adf-4fd2-94ce-e9b0663b91e7","name":{"pool_name":"oxp_26e698bb-006d-4208-94b9-d1bc279111fa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::b]:32345"},"services":[{"id":"072fdae8-2adf-4fd2-94ce-e9b0663b91e7","details":{"type":"crucible","address":"[fd00:1122:3344:103::b]:32345"}}]},"root":"/pool/ext/bf428719-1b16-4503-99f4-ad95846d916f/crypt/zone"},{"zone":{"id":"01f93020-7e7d-4185-93fb-6ca234056c82","zone_type":"crucible","addresses":["fd00:1122:3344:103::5"],"dataset":{"id":"01f93020-7e7d-4185-93fb-6ca234056c82","name":{"pool_name":"oxp_7b24095a-72df-45e3-984f-2b795e052ac7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::5]:32345"},"services":[{"id":"01f93020-7e7d-4185-93fb-6ca234056c82","details":{"type":"crucible","address":"[fd00:1122:3344:103::5]:32345"}}]},"root":"/pool/ext/7b24095a-72df-45e3-984f-2b795e052ac7/crypt/zone"},{"zone":{"id":"e238116d-e5cc-43d4-9c8a-6f138ae8a15d","zone_type":"crucible","addresses":["fd00:1122:3344:103::6"],"dataset":{"id":"e238116d-e5cc-43d4-9c8a-6f138ae8a15d","name":{"pool_name":"oxp_e126ddcc-8bee-46ba-8199-2a74df0ba040","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::6]:32345"},"services":[{"id":"e238116d-e5cc-43d4-9c8a-6f138ae8a15d","details":{"type":"crucible","address":"[fd00:1122:3344:103::6]:32345"}}]},"root":"/pool/ext/7b24095a-72df-45e3-984f-2b795e052ac7/crypt/zone"},{"zone":{"id":"585cd8c5-c41e-4be4-beb8-bfbef9b53856","zone_type":"crucible","addresses":["fd00:1122:3344:103::7"],"dataset":{"id":"585cd8c5-c41e-4be4-beb8-bfbef9b53856","name":{"pool_name":"oxp_6340805e-c5af-418d-8bd1-fc0085667f33","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::7]:32345"},"services":[{"id":"585cd8c5-c41e-4be4-beb8-bfbef9b53856","details":{"type":"crucible","address":"[fd00:1122:3344:103::7]:32345"}}]},"root":"/pool/ext/414e235b-55c3-4dc1-a568-8adf4ea1a052/crypt/zone"},{"zone":{"id":"0b41c560-3b20-42f4-82ad-92f5bb575d6b","zone_type":"crucible","addresses":["fd00:1122:3344:103::9"],"dataset":{"id":"0b41c560-3b20-42f4-82ad-92f5bb575d6b","name":{"pool_name":"oxp_b93f880e-c55b-4d6c-9a16-939d84b628fc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::9]:32345"},"services":[{"id":"0b41c560-3b20-42f4-82ad-92f5bb575d6b","details":{"type":"crucible","address":"[fd00:1122:3344:103::9]:32345"}}]},"root":"/pool/ext/6340805e-c5af-418d-8bd1-fc0085667f33/crypt/zone"},{"zone":{"id":"0ccf27c0-e32d-4b52-a2c5-6db0c64a26f9","zone_type":"crucible","addresses":["fd00:1122:3344:103::d"],"dataset":{"id":"0ccf27c0-e32d-4b52-a2c5-6db0c64a26f9","name":{"pool_name":"oxp_2115b084-be0f-4fba-941b-33a659798a9e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::d]:32345"},"services":[{"id":"0ccf27c0-e32d-4b52-a2c5-6db0c64a26f9","details":{"type":"crucible","address":"[fd00:1122:3344:103::d]:32345"}}]},"root":"/pool/ext/414e235b-55c3-4dc1-a568-8adf4ea1a052/crypt/zone"},{"zone":{"id":"a6ba8273-0320-4dab-b801-281f041b0c50","zone_type":"crucible","addresses":["fd00:1122:3344:103::4"],"dataset":{"id":"a6ba8273-0320-4dab-b801-281f041b0c50","name":{"pool_name":"oxp_8a199f12-4f5c-483a-8aca-f97856658a35","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::4]:32345"},"services":[{"id":"a6ba8273-0320-4dab-b801-281f041b0c50","details":{"type":"crucible","address":"[fd00:1122:3344:103::4]:32345"}}]},"root":"/pool/ext/b93f880e-c55b-4d6c-9a16-939d84b628fc/crypt/zone"},{"zone":{"id":"b9b7b4c2-284a-4ec1-80ea-75b7a43b71c4","zone_type":"crucible","addresses":["fd00:1122:3344:103::8"],"dataset":{"id":"b9b7b4c2-284a-4ec1-80ea-75b7a43b71c4","name":{"pool_name":"oxp_cf940e15-dbc5-481b-866a-4de4b018898e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::8]:32345"},"services":[{"id":"b9b7b4c2-284a-4ec1-80ea-75b7a43b71c4","details":{"type":"crucible","address":"[fd00:1122:3344:103::8]:32345"}}]},"root":"/pool/ext/cf940e15-dbc5-481b-866a-4de4b018898e/crypt/zone"},{"zone":{"id":"7a85d50e-b524-41c1-a052-118027eb77db","zone_type":"ntp","addresses":["fd00:1122:3344:103::e"],"dataset":null,"services":[{"id":"7a85d50e-b524-41c1-a052-118027eb77db","details":{"type":"internal_ntp","address":"[fd00:1122:3344:103::e]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/b93f880e-c55b-4d6c-9a16-939d84b628fc/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack2-sled9.json b/sled-agent/tests/old-service-ledgers/rack2-sled9.json new file mode 100644 index 0000000000..36af68759b --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack2-sled9.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"912346a2-d7e6-427e-b373-e8dcbe4fcea9","zone_type":"crucible","addresses":["fd00:1122:3344:105::5"],"dataset":{"id":"912346a2-d7e6-427e-b373-e8dcbe4fcea9","name":{"pool_name":"oxp_b358fb1e-f52a-4a63-9aab-170225509b37","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::5]:32345"},"services":[{"id":"912346a2-d7e6-427e-b373-e8dcbe4fcea9","details":{"type":"crucible","address":"[fd00:1122:3344:105::5]:32345"}}]},"root":"/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone"},{"zone":{"id":"3d420dff-c616-4c7d-bab1-0f9c2b5396bf","zone_type":"crucible","addresses":["fd00:1122:3344:105::a"],"dataset":{"id":"3d420dff-c616-4c7d-bab1-0f9c2b5396bf","name":{"pool_name":"oxp_4eb2e4eb-41d8-496c-9a5a-687d7e004aa4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::a]:32345"},"services":[{"id":"3d420dff-c616-4c7d-bab1-0f9c2b5396bf","details":{"type":"crucible","address":"[fd00:1122:3344:105::a]:32345"}}]},"root":"/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone"},{"zone":{"id":"9c5d88c9-8ff1-4f23-9438-7b81322eaf68","zone_type":"crucible","addresses":["fd00:1122:3344:105::b"],"dataset":{"id":"9c5d88c9-8ff1-4f23-9438-7b81322eaf68","name":{"pool_name":"oxp_aadf48eb-6ff0-40b5-a092-1fdd06c03e11","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::b]:32345"},"services":[{"id":"9c5d88c9-8ff1-4f23-9438-7b81322eaf68","details":{"type":"crucible","address":"[fd00:1122:3344:105::b]:32345"}}]},"root":"/pool/ext/4358f47f-f21e-4cc8-829e-0c7fc2400a59/crypt/zone"},{"zone":{"id":"f9c1deca-1898-429e-8c93-254c7aa7bae6","zone_type":"crucible","addresses":["fd00:1122:3344:105::8"],"dataset":{"id":"f9c1deca-1898-429e-8c93-254c7aa7bae6","name":{"pool_name":"oxp_d1cb6b7d-2b92-4b7d-8a4d-551987f0277e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::8]:32345"},"services":[{"id":"f9c1deca-1898-429e-8c93-254c7aa7bae6","details":{"type":"crucible","address":"[fd00:1122:3344:105::8]:32345"}}]},"root":"/pool/ext/f8b11629-ced6-412a-9c3f-d169b99ee996/crypt/zone"},{"zone":{"id":"ce8563f3-4a93-45ff-b727-cbfbee6aa413","zone_type":"crucible","addresses":["fd00:1122:3344:105::9"],"dataset":{"id":"ce8563f3-4a93-45ff-b727-cbfbee6aa413","name":{"pool_name":"oxp_4358f47f-f21e-4cc8-829e-0c7fc2400a59","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::9]:32345"},"services":[{"id":"ce8563f3-4a93-45ff-b727-cbfbee6aa413","details":{"type":"crucible","address":"[fd00:1122:3344:105::9]:32345"}}]},"root":"/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone"},{"zone":{"id":"9470ea7d-1920-4b4b-8fca-e7659a1ef733","zone_type":"crucible","addresses":["fd00:1122:3344:105::c"],"dataset":{"id":"9470ea7d-1920-4b4b-8fca-e7659a1ef733","name":{"pool_name":"oxp_17eff217-f0b1-4353-b133-0f68bbd5ceaa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::c]:32345"},"services":[{"id":"9470ea7d-1920-4b4b-8fca-e7659a1ef733","details":{"type":"crucible","address":"[fd00:1122:3344:105::c]:32345"}}]},"root":"/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone"},{"zone":{"id":"375296e5-0a23-466c-b605-4204080f8103","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:105::4"],"dataset":null,"services":[{"id":"375296e5-0a23-466c-b605-4204080f8103","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:105::4]:17000"}}]},"root":"/pool/ext/4eb2e4eb-41d8-496c-9a5a-687d7e004aa4/crypt/zone"},{"zone":{"id":"f9940969-b0e8-4e8c-86c7-4bc49cd15a5f","zone_type":"crucible","addresses":["fd00:1122:3344:105::7"],"dataset":{"id":"f9940969-b0e8-4e8c-86c7-4bc49cd15a5f","name":{"pool_name":"oxp_f8b11629-ced6-412a-9c3f-d169b99ee996","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::7]:32345"},"services":[{"id":"f9940969-b0e8-4e8c-86c7-4bc49cd15a5f","details":{"type":"crucible","address":"[fd00:1122:3344:105::7]:32345"}}]},"root":"/pool/ext/17eff217-f0b1-4353-b133-0f68bbd5ceaa/crypt/zone"},{"zone":{"id":"23dca27d-c79b-4930-a817-392e8aeaa4c1","zone_type":"crucible","addresses":["fd00:1122:3344:105::e"],"dataset":{"id":"23dca27d-c79b-4930-a817-392e8aeaa4c1","name":{"pool_name":"oxp_57650e05-36ff-4de8-865f-b9562bdb67f5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::e]:32345"},"services":[{"id":"23dca27d-c79b-4930-a817-392e8aeaa4c1","details":{"type":"crucible","address":"[fd00:1122:3344:105::e]:32345"}}]},"root":"/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone"},{"zone":{"id":"92d3e4e9-0768-4772-83c1-23cce52190e9","zone_type":"crucible","addresses":["fd00:1122:3344:105::6"],"dataset":{"id":"92d3e4e9-0768-4772-83c1-23cce52190e9","name":{"pool_name":"oxp_eb1234a5-fdf7-4977-94d5-2eef25ce56a1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::6]:32345"},"services":[{"id":"92d3e4e9-0768-4772-83c1-23cce52190e9","details":{"type":"crucible","address":"[fd00:1122:3344:105::6]:32345"}}]},"root":"/pool/ext/b358fb1e-f52a-4a63-9aab-170225509b37/crypt/zone"},{"zone":{"id":"b3e9fee2-24d2-44e7-8539-a6918e85cf2b","zone_type":"crucible","addresses":["fd00:1122:3344:105::d"],"dataset":{"id":"b3e9fee2-24d2-44e7-8539-a6918e85cf2b","name":{"pool_name":"oxp_0ae29053-29a2-489e-a1e6-6aec0ecd05f8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::d]:32345"},"services":[{"id":"b3e9fee2-24d2-44e7-8539-a6918e85cf2b","details":{"type":"crucible","address":"[fd00:1122:3344:105::d]:32345"}}]},"root":"/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone"},{"zone":{"id":"4c3ef132-ec83-4b1b-9574-7c7d3035f9e9","zone_type":"cockroach_db","addresses":["fd00:1122:3344:105::3"],"dataset":{"id":"4c3ef132-ec83-4b1b-9574-7c7d3035f9e9","name":{"pool_name":"oxp_b358fb1e-f52a-4a63-9aab-170225509b37","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:105::3]:32221"},"services":[{"id":"4c3ef132-ec83-4b1b-9574-7c7d3035f9e9","details":{"type":"cockroach_db","address":"[fd00:1122:3344:105::3]:32221"}}]},"root":"/pool/ext/d1cb6b7d-2b92-4b7d-8a4d-551987f0277e/crypt/zone"},{"zone":{"id":"76b79b96-eaa2-4341-9aba-e77cfc92e0a9","zone_type":"ntp","addresses":["fd00:1122:3344:105::f"],"dataset":null,"services":[{"id":"76b79b96-eaa2-4341-9aba-e77cfc92e0a9","details":{"type":"internal_ntp","address":"[fd00:1122:3344:105::f]:123","ntp_servers":["c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal","6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled0.json b/sled-agent/tests/old-service-ledgers/rack3-sled0.json new file mode 100644 index 0000000000..a853a525bc --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled0.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"0710ecea-dbc4-417f-a6f7-1b97c3045db1","zone_type":"crucible","addresses":["fd00:1122:3344:116::6"],"dataset":{"id":"0710ecea-dbc4-417f-a6f7-1b97c3045db1","name":{"pool_name":"oxp_d5313ef5-019c-4c47-bc5e-63794107a1bb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::6]:32345"},"services":[{"id":"0710ecea-dbc4-417f-a6f7-1b97c3045db1","details":{"type":"crucible","address":"[fd00:1122:3344:116::6]:32345"}}]},"root":"/pool/ext/904e93a9-d175-4a20-9006-8c1e847aecf7/crypt/zone"},{"zone":{"id":"28b29d14-d55f-4b55-bbc1-f66e46ae3e70","zone_type":"crucible","addresses":["fd00:1122:3344:116::9"],"dataset":{"id":"28b29d14-d55f-4b55-bbc1-f66e46ae3e70","name":{"pool_name":"oxp_60755ffe-e9ee-4619-a751-8b3ea6405e67","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::9]:32345"},"services":[{"id":"28b29d14-d55f-4b55-bbc1-f66e46ae3e70","details":{"type":"crucible","address":"[fd00:1122:3344:116::9]:32345"}}]},"root":"/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone"},{"zone":{"id":"6f8f9fd2-b139-4069-a7e2-8d40efd58f6c","zone_type":"crucible","addresses":["fd00:1122:3344:116::d"],"dataset":{"id":"6f8f9fd2-b139-4069-a7e2-8d40efd58f6c","name":{"pool_name":"oxp_ccd2cb0b-782f-4026-a160-6d1192f04ca3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::d]:32345"},"services":[{"id":"6f8f9fd2-b139-4069-a7e2-8d40efd58f6c","details":{"type":"crucible","address":"[fd00:1122:3344:116::d]:32345"}}]},"root":"/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone"},{"zone":{"id":"450308ad-bf4d-40ff-ba62-f3290f7fffaf","zone_type":"crucible","addresses":["fd00:1122:3344:116::4"],"dataset":{"id":"450308ad-bf4d-40ff-ba62-f3290f7fffaf","name":{"pool_name":"oxp_46b09442-65ba-4d59-9121-9803fe3b724b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::4]:32345"},"services":[{"id":"450308ad-bf4d-40ff-ba62-f3290f7fffaf","details":{"type":"crucible","address":"[fd00:1122:3344:116::4]:32345"}}]},"root":"/pool/ext/54d901cc-f75e-417d-8a9f-24363136d0ef/crypt/zone"},{"zone":{"id":"9a22bbaa-eab4-4a32-8546-9882dc029483","zone_type":"crucible","addresses":["fd00:1122:3344:116::8"],"dataset":{"id":"9a22bbaa-eab4-4a32-8546-9882dc029483","name":{"pool_name":"oxp_93e3f350-75a0-4af0-bdac-baf9b423926f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::8]:32345"},"services":[{"id":"9a22bbaa-eab4-4a32-8546-9882dc029483","details":{"type":"crucible","address":"[fd00:1122:3344:116::8]:32345"}}]},"root":"/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone"},{"zone":{"id":"63a9dc49-0b5b-4483-95ed-553b545dc202","zone_type":"crucible","addresses":["fd00:1122:3344:116::a"],"dataset":{"id":"63a9dc49-0b5b-4483-95ed-553b545dc202","name":{"pool_name":"oxp_e3532845-76c0-42a9-903b-a07f7992e937","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::a]:32345"},"services":[{"id":"63a9dc49-0b5b-4483-95ed-553b545dc202","details":{"type":"crucible","address":"[fd00:1122:3344:116::a]:32345"}}]},"root":"/pool/ext/60755ffe-e9ee-4619-a751-8b3ea6405e67/crypt/zone"},{"zone":{"id":"1fef5b6c-78e4-4ad9-9973-9d8c78f1e232","zone_type":"crucible","addresses":["fd00:1122:3344:116::7"],"dataset":{"id":"1fef5b6c-78e4-4ad9-9973-9d8c78f1e232","name":{"pool_name":"oxp_54d901cc-f75e-417d-8a9f-24363136d0ef","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::7]:32345"},"services":[{"id":"1fef5b6c-78e4-4ad9-9973-9d8c78f1e232","details":{"type":"crucible","address":"[fd00:1122:3344:116::7]:32345"}}]},"root":"/pool/ext/90d7b6f9-3e28-48b0-86ac-0486728075cf/crypt/zone"},{"zone":{"id":"b2aab21a-cccd-4aa9-977f-a32090e6eaa7","zone_type":"crucible","addresses":["fd00:1122:3344:116::5"],"dataset":{"id":"b2aab21a-cccd-4aa9-977f-a32090e6eaa7","name":{"pool_name":"oxp_90d7b6f9-3e28-48b0-86ac-0486728075cf","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::5]:32345"},"services":[{"id":"b2aab21a-cccd-4aa9-977f-a32090e6eaa7","details":{"type":"crucible","address":"[fd00:1122:3344:116::5]:32345"}}]},"root":"/pool/ext/46b09442-65ba-4d59-9121-9803fe3b724b/crypt/zone"},{"zone":{"id":"fc1bbf28-24f3-4c1f-b367-2bc8231eb7d4","zone_type":"crucible","addresses":["fd00:1122:3344:116::b"],"dataset":{"id":"fc1bbf28-24f3-4c1f-b367-2bc8231eb7d4","name":{"pool_name":"oxp_0a7bb0d3-408b-42b1-8846-76cf106a9580","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::b]:32345"},"services":[{"id":"fc1bbf28-24f3-4c1f-b367-2bc8231eb7d4","details":{"type":"crucible","address":"[fd00:1122:3344:116::b]:32345"}}]},"root":"/pool/ext/e3532845-76c0-42a9-903b-a07f7992e937/crypt/zone"},{"zone":{"id":"bcb7617a-f76a-4912-8ccc-802d2a697e3c","zone_type":"crucible","addresses":["fd00:1122:3344:116::c"],"dataset":{"id":"bcb7617a-f76a-4912-8ccc-802d2a697e3c","name":{"pool_name":"oxp_904e93a9-d175-4a20-9006-8c1e847aecf7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:116::c]:32345"},"services":[{"id":"bcb7617a-f76a-4912-8ccc-802d2a697e3c","details":{"type":"crucible","address":"[fd00:1122:3344:116::c]:32345"}}]},"root":"/pool/ext/ccd2cb0b-782f-4026-a160-6d1192f04ca3/crypt/zone"},{"zone":{"id":"371fba3a-658b-469b-b675-c90cc0d39254","zone_type":"cockroach_db","addresses":["fd00:1122:3344:116::3"],"dataset":{"id":"371fba3a-658b-469b-b675-c90cc0d39254","name":{"pool_name":"oxp_46b09442-65ba-4d59-9121-9803fe3b724b","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:116::3]:32221"},"services":[{"id":"371fba3a-658b-469b-b675-c90cc0d39254","details":{"type":"cockroach_db","address":"[fd00:1122:3344:116::3]:32221"}}]},"root":"/pool/ext/46b09442-65ba-4d59-9121-9803fe3b724b/crypt/zone"},{"zone":{"id":"5a4d89f5-49e0-4566-a99c-342d1bb26b1c","zone_type":"ntp","addresses":["fd00:1122:3344:116::e"],"dataset":null,"services":[{"id":"5a4d89f5-49e0-4566-a99c-342d1bb26b1c","details":{"type":"internal_ntp","address":"[fd00:1122:3344:116::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/60755ffe-e9ee-4619-a751-8b3ea6405e67/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled1.json b/sled-agent/tests/old-service-ledgers/rack3-sled1.json new file mode 100644 index 0000000000..bd735e5e64 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled1.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"f401d06c-46fc-42f8-aa51-7515a51355ce","zone_type":"crucible","addresses":["fd00:1122:3344:11c::8"],"dataset":{"id":"f401d06c-46fc-42f8-aa51-7515a51355ce","name":{"pool_name":"oxp_8a88768a-2dd5-43b7-bd40-0db77be4d3a8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::8]:32345"},"services":[{"id":"f401d06c-46fc-42f8-aa51-7515a51355ce","details":{"type":"crucible","address":"[fd00:1122:3344:11c::8]:32345"}}]},"root":"/pool/ext/19d23d27-6a33-4203-b8c1-4b0df4ac791f/crypt/zone"},{"zone":{"id":"721c96ea-08d4-4c89-828f-600e7e344916","zone_type":"crucible","addresses":["fd00:1122:3344:11c::6"],"dataset":{"id":"721c96ea-08d4-4c89-828f-600e7e344916","name":{"pool_name":"oxp_15259003-fb04-4547-b4a9-b4511893c0fd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::6]:32345"},"services":[{"id":"721c96ea-08d4-4c89-828f-600e7e344916","details":{"type":"crucible","address":"[fd00:1122:3344:11c::6]:32345"}}]},"root":"/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone"},{"zone":{"id":"ca17bdf9-51c5-4e1e-b822-856609070ec6","zone_type":"crucible","addresses":["fd00:1122:3344:11c::5"],"dataset":{"id":"ca17bdf9-51c5-4e1e-b822-856609070ec6","name":{"pool_name":"oxp_d2a8ed82-22ef-46d8-ad40-e1cb2cecebee","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::5]:32345"},"services":[{"id":"ca17bdf9-51c5-4e1e-b822-856609070ec6","details":{"type":"crucible","address":"[fd00:1122:3344:11c::5]:32345"}}]},"root":"/pool/ext/15259003-fb04-4547-b4a9-b4511893c0fd/crypt/zone"},{"zone":{"id":"5825447e-1b5b-4960-b202-e75853d3d250","zone_type":"crucible","addresses":["fd00:1122:3344:11c::9"],"dataset":{"id":"5825447e-1b5b-4960-b202-e75853d3d250","name":{"pool_name":"oxp_04e94454-cbd4-4cee-ad69-42372bcbabd5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::9]:32345"},"services":[{"id":"5825447e-1b5b-4960-b202-e75853d3d250","details":{"type":"crucible","address":"[fd00:1122:3344:11c::9]:32345"}}]},"root":"/pool/ext/542e0fb3-552c-4d3b-b853-da1f13b581a0/crypt/zone"},{"zone":{"id":"b937d3f0-1352-47a2-b9d1-a9ccf9c82b16","zone_type":"crucible","addresses":["fd00:1122:3344:11c::c"],"dataset":{"id":"b937d3f0-1352-47a2-b9d1-a9ccf9c82b16","name":{"pool_name":"oxp_542e0fb3-552c-4d3b-b853-da1f13b581a0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::c]:32345"},"services":[{"id":"b937d3f0-1352-47a2-b9d1-a9ccf9c82b16","details":{"type":"crucible","address":"[fd00:1122:3344:11c::c]:32345"}}]},"root":"/pool/ext/eedd1d58-4892-456f-aaf7-9d650c7921ca/crypt/zone"},{"zone":{"id":"d63a677b-8dac-44ee-89a2-cc4cb151254d","zone_type":"crucible","addresses":["fd00:1122:3344:11c::3"],"dataset":{"id":"d63a677b-8dac-44ee-89a2-cc4cb151254d","name":{"pool_name":"oxp_45b5f1ee-7b66-4d74-8364-54fa0c73775f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::3]:32345"},"services":[{"id":"d63a677b-8dac-44ee-89a2-cc4cb151254d","details":{"type":"crucible","address":"[fd00:1122:3344:11c::3]:32345"}}]},"root":"/pool/ext/8a88768a-2dd5-43b7-bd40-0db77be4d3a8/crypt/zone"},{"zone":{"id":"abcb92ea-9f17-4cd8-897b-9d0d1ef7903a","zone_type":"crucible","addresses":["fd00:1122:3344:11c::4"],"dataset":{"id":"abcb92ea-9f17-4cd8-897b-9d0d1ef7903a","name":{"pool_name":"oxp_341d49db-c06a-416d-90e1-b0a3426ed02e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::4]:32345"},"services":[{"id":"abcb92ea-9f17-4cd8-897b-9d0d1ef7903a","details":{"type":"crucible","address":"[fd00:1122:3344:11c::4]:32345"}}]},"root":"/pool/ext/eedd1d58-4892-456f-aaf7-9d650c7921ca/crypt/zone"},{"zone":{"id":"000ac89d-db07-47ae-83cf-d9cafef013de","zone_type":"crucible","addresses":["fd00:1122:3344:11c::b"],"dataset":{"id":"000ac89d-db07-47ae-83cf-d9cafef013de","name":{"pool_name":"oxp_eedd1d58-4892-456f-aaf7-9d650c7921ca","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::b]:32345"},"services":[{"id":"000ac89d-db07-47ae-83cf-d9cafef013de","details":{"type":"crucible","address":"[fd00:1122:3344:11c::b]:32345"}}]},"root":"/pool/ext/04e94454-cbd4-4cee-ad69-42372bcbabd5/crypt/zone"},{"zone":{"id":"29e1e2e4-695e-4c05-8f0c-c16a0a61d390","zone_type":"crucible","addresses":["fd00:1122:3344:11c::7"],"dataset":{"id":"29e1e2e4-695e-4c05-8f0c-c16a0a61d390","name":{"pool_name":"oxp_19d23d27-6a33-4203-b8c1-4b0df4ac791f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::7]:32345"},"services":[{"id":"29e1e2e4-695e-4c05-8f0c-c16a0a61d390","details":{"type":"crucible","address":"[fd00:1122:3344:11c::7]:32345"}}]},"root":"/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone"},{"zone":{"id":"9fa7d7be-a6de-4d36-b56b-d1cc5ca7c82c","zone_type":"crucible","addresses":["fd00:1122:3344:11c::a"],"dataset":{"id":"9fa7d7be-a6de-4d36-b56b-d1cc5ca7c82c","name":{"pool_name":"oxp_0fd7a0b1-ed4b-4dc6-8c44-a49c9628c7e1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11c::a]:32345"},"services":[{"id":"9fa7d7be-a6de-4d36-b56b-d1cc5ca7c82c","details":{"type":"crucible","address":"[fd00:1122:3344:11c::a]:32345"}}]},"root":"/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone"},{"zone":{"id":"249db5f1-45e2-4a5c-a91f-cc51dbd87040","zone_type":"ntp","addresses":["fd00:1122:3344:11c::d"],"dataset":null,"services":[{"id":"249db5f1-45e2-4a5c-a91f-cc51dbd87040","details":{"type":"internal_ntp","address":"[fd00:1122:3344:11c::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/542e0fb3-552c-4d3b-b853-da1f13b581a0/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled11.json b/sled-agent/tests/old-service-ledgers/rack3-sled11.json new file mode 100644 index 0000000000..2918c74c4b --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled11.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"7ddd0738-59df-4b67-a41e-7f0de9827187","zone_type":"crucible","addresses":["fd00:1122:3344:11e::4"],"dataset":{"id":"7ddd0738-59df-4b67-a41e-7f0de9827187","name":{"pool_name":"oxp_09af632a-6b1b-4a18-8c91-d392da38b02f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::4]:32345"},"services":[{"id":"7ddd0738-59df-4b67-a41e-7f0de9827187","details":{"type":"crucible","address":"[fd00:1122:3344:11e::4]:32345"}}]},"root":"/pool/ext/09af632a-6b1b-4a18-8c91-d392da38b02f/crypt/zone"},{"zone":{"id":"9706189f-713a-4394-b5dc-45dcf67dc46e","zone_type":"crucible","addresses":["fd00:1122:3344:11e::9"],"dataset":{"id":"9706189f-713a-4394-b5dc-45dcf67dc46e","name":{"pool_name":"oxp_4e1837c8-91ab-4d1d-abfd-f5144d88535e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::9]:32345"},"services":[{"id":"9706189f-713a-4394-b5dc-45dcf67dc46e","details":{"type":"crucible","address":"[fd00:1122:3344:11e::9]:32345"}}]},"root":"/pool/ext/2f0d47cb-28d1-4350-8656-60c6121f773b/crypt/zone"},{"zone":{"id":"7bdd841b-5e34-4c19-9066-b12578651446","zone_type":"crucible","addresses":["fd00:1122:3344:11e::a"],"dataset":{"id":"7bdd841b-5e34-4c19-9066-b12578651446","name":{"pool_name":"oxp_78d1e7f7-8d11-4fed-8b1e-be58908aea2f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::a]:32345"},"services":[{"id":"7bdd841b-5e34-4c19-9066-b12578651446","details":{"type":"crucible","address":"[fd00:1122:3344:11e::a]:32345"}}]},"root":"/pool/ext/62c23f4b-8e7b-4cd8-9055-19c1d8bd5ac8/crypt/zone"},{"zone":{"id":"74c0f60b-de5f-4456-a85f-f992a6e10424","zone_type":"crucible","addresses":["fd00:1122:3344:11e::b"],"dataset":{"id":"74c0f60b-de5f-4456-a85f-f992a6e10424","name":{"pool_name":"oxp_3b81d709-bf10-4dd7-a2c0-759d8acc2da0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::b]:32345"},"services":[{"id":"74c0f60b-de5f-4456-a85f-f992a6e10424","details":{"type":"crucible","address":"[fd00:1122:3344:11e::b]:32345"}}]},"root":"/pool/ext/09af632a-6b1b-4a18-8c91-d392da38b02f/crypt/zone"},{"zone":{"id":"da81ce6f-bd38-440e-b966-8a743092fa21","zone_type":"crucible","addresses":["fd00:1122:3344:11e::6"],"dataset":{"id":"da81ce6f-bd38-440e-b966-8a743092fa21","name":{"pool_name":"oxp_62c23f4b-8e7b-4cd8-9055-19c1d8bd5ac8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::6]:32345"},"services":[{"id":"da81ce6f-bd38-440e-b966-8a743092fa21","details":{"type":"crucible","address":"[fd00:1122:3344:11e::6]:32345"}}]},"root":"/pool/ext/215dd02b-0de6-488a-9e65-5e588cd079fb/crypt/zone"},{"zone":{"id":"febbca37-5279-400f-a2e9-6b5271b2d2fc","zone_type":"crucible","addresses":["fd00:1122:3344:11e::7"],"dataset":{"id":"febbca37-5279-400f-a2e9-6b5271b2d2fc","name":{"pool_name":"oxp_fb33e773-fb93-41a0-8078-b653b9078dda","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::7]:32345"},"services":[{"id":"febbca37-5279-400f-a2e9-6b5271b2d2fc","details":{"type":"crucible","address":"[fd00:1122:3344:11e::7]:32345"}}]},"root":"/pool/ext/2f0d47cb-28d1-4350-8656-60c6121f773b/crypt/zone"},{"zone":{"id":"5100e222-5ea4-4e67-9040-679137e666c8","zone_type":"crucible","addresses":["fd00:1122:3344:11e::5"],"dataset":{"id":"5100e222-5ea4-4e67-9040-679137e666c8","name":{"pool_name":"oxp_23767587-2253-431b-8944-18b9bfefcb3d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::5]:32345"},"services":[{"id":"5100e222-5ea4-4e67-9040-679137e666c8","details":{"type":"crucible","address":"[fd00:1122:3344:11e::5]:32345"}}]},"root":"/pool/ext/3b81d709-bf10-4dd7-a2c0-759d8acc2da0/crypt/zone"},{"zone":{"id":"c7ec3bc8-08ca-4901-a45e-0d68db72c6a7","zone_type":"crucible","addresses":["fd00:1122:3344:11e::3"],"dataset":{"id":"c7ec3bc8-08ca-4901-a45e-0d68db72c6a7","name":{"pool_name":"oxp_2f0d47cb-28d1-4350-8656-60c6121f773b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::3]:32345"},"services":[{"id":"c7ec3bc8-08ca-4901-a45e-0d68db72c6a7","details":{"type":"crucible","address":"[fd00:1122:3344:11e::3]:32345"}}]},"root":"/pool/ext/215dd02b-0de6-488a-9e65-5e588cd079fb/crypt/zone"},{"zone":{"id":"1fc80dd3-0fd9-4403-96bd-5bbf9eb0f15a","zone_type":"crucible","addresses":["fd00:1122:3344:11e::c"],"dataset":{"id":"1fc80dd3-0fd9-4403-96bd-5bbf9eb0f15a","name":{"pool_name":"oxp_2c932d54-41fb-4ffe-a57f-0479b9e5841e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::c]:32345"},"services":[{"id":"1fc80dd3-0fd9-4403-96bd-5bbf9eb0f15a","details":{"type":"crucible","address":"[fd00:1122:3344:11e::c]:32345"}}]},"root":"/pool/ext/3b81d709-bf10-4dd7-a2c0-759d8acc2da0/crypt/zone"},{"zone":{"id":"4eacc68d-5699-440a-ab33-c75f259e4cc3","zone_type":"crucible","addresses":["fd00:1122:3344:11e::8"],"dataset":{"id":"4eacc68d-5699-440a-ab33-c75f259e4cc3","name":{"pool_name":"oxp_215dd02b-0de6-488a-9e65-5e588cd079fb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11e::8]:32345"},"services":[{"id":"4eacc68d-5699-440a-ab33-c75f259e4cc3","details":{"type":"crucible","address":"[fd00:1122:3344:11e::8]:32345"}}]},"root":"/pool/ext/4e1837c8-91ab-4d1d-abfd-f5144d88535e/crypt/zone"},{"zone":{"id":"cb901d3e-8811-4c4c-a274-a44130501ecf","zone_type":"ntp","addresses":["fd00:1122:3344:11e::d"],"dataset":null,"services":[{"id":"cb901d3e-8811-4c4c-a274-a44130501ecf","details":{"type":"boundary_ntp","address":"[fd00:1122:3344:11e::d]:123","ntp_servers":["time.cloudflare.com"],"dns_servers":["1.1.1.1","8.8.8.8"],"domain":null,"nic":{"id":"bcf9d9eb-b4ba-4fd5-91e0-55a3414ae049","kind":{"type":"service","id":"cb901d3e-8811-4c4c-a274-a44130501ecf"},"name":"ntp-cb901d3e-8811-4c4c-a274-a44130501ecf","ip":"172.30.3.6","mac":"A8:40:25:FF:D5:2F","subnet":"172.30.3.0/24","vni":100,"primary":true,"slot":0},"snat_cfg":{"ip":"45.154.216.39","first_port":16384,"last_port":32767}}}]},"root":"/pool/ext/23767587-2253-431b-8944-18b9bfefcb3d/crypt/zone"},{"zone":{"id":"be4aada9-d160-401d-a630-a0764c039702","zone_type":"internal_dns","addresses":["fd00:1122:3344:2::1"],"dataset":{"id":"be4aada9-d160-401d-a630-a0764c039702","name":{"pool_name":"oxp_2f0d47cb-28d1-4350-8656-60c6121f773b","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:2::1]:5353"},"services":[{"id":"be4aada9-d160-401d-a630-a0764c039702","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:2::1]:5353","dns_address":"[fd00:1122:3344:2::1]:53","gz_address":"fd00:1122:3344:2::2","gz_address_index":1}}]},"root":"/pool/ext/78d1e7f7-8d11-4fed-8b1e-be58908aea2f/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled12.json b/sled-agent/tests/old-service-ledgers/rack3-sled12.json new file mode 100644 index 0000000000..c81f586e01 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled12.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"d8f1b9d2-fa2e-4f03-bbea-2039448d7792","zone_type":"crucible","addresses":["fd00:1122:3344:112::5"],"dataset":{"id":"d8f1b9d2-fa2e-4f03-bbea-2039448d7792","name":{"pool_name":"oxp_7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::5]:32345"},"services":[{"id":"d8f1b9d2-fa2e-4f03-bbea-2039448d7792","details":{"type":"crucible","address":"[fd00:1122:3344:112::5]:32345"}}]},"root":"/pool/ext/78d9f0ae-8e7f-450e-abc2-76b983efa5cd/crypt/zone"},{"zone":{"id":"2074a935-c0b3-4c4f-aae5-a29adae3e1ac","zone_type":"crucible","addresses":["fd00:1122:3344:112::8"],"dataset":{"id":"2074a935-c0b3-4c4f-aae5-a29adae3e1ac","name":{"pool_name":"oxp_ac663368-45fb-447c-811e-561c68e37bdd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::8]:32345"},"services":[{"id":"2074a935-c0b3-4c4f-aae5-a29adae3e1ac","details":{"type":"crucible","address":"[fd00:1122:3344:112::8]:32345"}}]},"root":"/pool/ext/ac663368-45fb-447c-811e-561c68e37bdd/crypt/zone"},{"zone":{"id":"2885d3c7-ad7d-445c-8630-dc6c81f8caa0","zone_type":"crucible","addresses":["fd00:1122:3344:112::a"],"dataset":{"id":"2885d3c7-ad7d-445c-8630-dc6c81f8caa0","name":{"pool_name":"oxp_8e82e8da-e1c5-4867-bc1c-b5441f9c1010","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::a]:32345"},"services":[{"id":"2885d3c7-ad7d-445c-8630-dc6c81f8caa0","details":{"type":"crucible","address":"[fd00:1122:3344:112::a]:32345"}}]},"root":"/pool/ext/8e82e8da-e1c5-4867-bc1c-b5441f9c1010/crypt/zone"},{"zone":{"id":"1eca241b-6868-4c59-876b-58356654f3b5","zone_type":"crucible","addresses":["fd00:1122:3344:112::c"],"dataset":{"id":"1eca241b-6868-4c59-876b-58356654f3b5","name":{"pool_name":"oxp_fde16c69-aa47-4a15-bb3f-3a5861ae45bd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::c]:32345"},"services":[{"id":"1eca241b-6868-4c59-876b-58356654f3b5","details":{"type":"crucible","address":"[fd00:1122:3344:112::c]:32345"}}]},"root":"/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone"},{"zone":{"id":"cc656f2e-8542-4986-8524-2f55984939c1","zone_type":"crucible","addresses":["fd00:1122:3344:112::d"],"dataset":{"id":"cc656f2e-8542-4986-8524-2f55984939c1","name":{"pool_name":"oxp_21e6d0f9-887e-4d6f-9a00-4cd61139eea6","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::d]:32345"},"services":[{"id":"cc656f2e-8542-4986-8524-2f55984939c1","details":{"type":"crucible","address":"[fd00:1122:3344:112::d]:32345"}}]},"root":"/pool/ext/21e6d0f9-887e-4d6f-9a00-4cd61139eea6/crypt/zone"},{"zone":{"id":"dfb1ebce-a4c7-4b50-9435-9a79b884c1af","zone_type":"clickhouse","addresses":["fd00:1122:3344:112::3"],"dataset":{"id":"dfb1ebce-a4c7-4b50-9435-9a79b884c1af","name":{"pool_name":"oxp_4f045315-de51-46ed-a011-16496615278f","kind":{"type":"clickhouse"}},"service_address":"[fd00:1122:3344:112::3]:8123"},"services":[{"id":"dfb1ebce-a4c7-4b50-9435-9a79b884c1af","details":{"type":"clickhouse","address":"[fd00:1122:3344:112::3]:8123"}}]},"root":"/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone"},{"zone":{"id":"a95d90ed-b2b1-4a5d-8d0d-4195b34bc764","zone_type":"crucible","addresses":["fd00:1122:3344:112::6"],"dataset":{"id":"a95d90ed-b2b1-4a5d-8d0d-4195b34bc764","name":{"pool_name":"oxp_d2c77c69-14d7-442e-8b47-a0d7af5a0e7e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::6]:32345"},"services":[{"id":"a95d90ed-b2b1-4a5d-8d0d-4195b34bc764","details":{"type":"crucible","address":"[fd00:1122:3344:112::6]:32345"}}]},"root":"/pool/ext/fad56ff1-ad9f-4215-b584-522eab18cf7b/crypt/zone"},{"zone":{"id":"1d3ebc90-d5a5-4cb0-ae90-50bb2163ae13","zone_type":"crucible","addresses":["fd00:1122:3344:112::b"],"dataset":{"id":"1d3ebc90-d5a5-4cb0-ae90-50bb2163ae13","name":{"pool_name":"oxp_fad56ff1-ad9f-4215-b584-522eab18cf7b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::b]:32345"},"services":[{"id":"1d3ebc90-d5a5-4cb0-ae90-50bb2163ae13","details":{"type":"crucible","address":"[fd00:1122:3344:112::b]:32345"}}]},"root":"/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone"},{"zone":{"id":"7af9f38b-0c7a-402e-8db3-7c7fb50b4665","zone_type":"crucible","addresses":["fd00:1122:3344:112::9"],"dataset":{"id":"7af9f38b-0c7a-402e-8db3-7c7fb50b4665","name":{"pool_name":"oxp_d0693580-5c5a-449f-803f-ce7188ebc580","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::9]:32345"},"services":[{"id":"7af9f38b-0c7a-402e-8db3-7c7fb50b4665","details":{"type":"crucible","address":"[fd00:1122:3344:112::9]:32345"}}]},"root":"/pool/ext/d2c77c69-14d7-442e-8b47-a0d7af5a0e7e/crypt/zone"},{"zone":{"id":"94d9bb0a-ecd2-4501-b960-60982f55ad12","zone_type":"crucible","addresses":["fd00:1122:3344:112::7"],"dataset":{"id":"94d9bb0a-ecd2-4501-b960-60982f55ad12","name":{"pool_name":"oxp_78d9f0ae-8e7f-450e-abc2-76b983efa5cd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::7]:32345"},"services":[{"id":"94d9bb0a-ecd2-4501-b960-60982f55ad12","details":{"type":"crucible","address":"[fd00:1122:3344:112::7]:32345"}}]},"root":"/pool/ext/ac663368-45fb-447c-811e-561c68e37bdd/crypt/zone"},{"zone":{"id":"277c1105-576e-4ec1-8e2c-cbae2f5ac9f6","zone_type":"crucible","addresses":["fd00:1122:3344:112::4"],"dataset":{"id":"277c1105-576e-4ec1-8e2c-cbae2f5ac9f6","name":{"pool_name":"oxp_4f045315-de51-46ed-a011-16496615278f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:112::4]:32345"},"services":[{"id":"277c1105-576e-4ec1-8e2c-cbae2f5ac9f6","details":{"type":"crucible","address":"[fd00:1122:3344:112::4]:32345"}}]},"root":"/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone"},{"zone":{"id":"555c3407-a76c-4ea4-a17a-a670d85a59b0","zone_type":"ntp","addresses":["fd00:1122:3344:112::e"],"dataset":null,"services":[{"id":"555c3407-a76c-4ea4-a17a-a670d85a59b0","details":{"type":"internal_ntp","address":"[fd00:1122:3344:112::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/8e82e8da-e1c5-4867-bc1c-b5441f9c1010/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled13.json b/sled-agent/tests/old-service-ledgers/rack3-sled13.json new file mode 100644 index 0000000000..ab151a828e --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled13.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"fbcf51c9-a732-4a03-8c19-cfb5b819cb7a","zone_type":"crucible","addresses":["fd00:1122:3344:104::5"],"dataset":{"id":"fbcf51c9-a732-4a03-8c19-cfb5b819cb7a","name":{"pool_name":"oxp_382a2961-cd27-4a9c-901d-468a45ff5708","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::5]:32345"},"services":[{"id":"fbcf51c9-a732-4a03-8c19-cfb5b819cb7a","details":{"type":"crucible","address":"[fd00:1122:3344:104::5]:32345"}}]},"root":"/pool/ext/e99994ae-61ca-4742-a02c-eb0a8a5b69ff/crypt/zone"},{"zone":{"id":"7f8a5026-1f1d-4ab3-8c04-077bfda2f815","zone_type":"crucible","addresses":["fd00:1122:3344:104::4"],"dataset":{"id":"7f8a5026-1f1d-4ab3-8c04-077bfda2f815","name":{"pool_name":"oxp_9c99b9b6-8018-455e-a58a-c048ddd3e11b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::4]:32345"},"services":[{"id":"7f8a5026-1f1d-4ab3-8c04-077bfda2f815","details":{"type":"crucible","address":"[fd00:1122:3344:104::4]:32345"}}]},"root":"/pool/ext/22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167/crypt/zone"},{"zone":{"id":"6d45d856-0e49-4eb7-ad76-989a9ae636a2","zone_type":"crucible","addresses":["fd00:1122:3344:104::3"],"dataset":{"id":"6d45d856-0e49-4eb7-ad76-989a9ae636a2","name":{"pool_name":"oxp_b74a84fa-b4c8-4c5f-92f4-f4e62a0a311d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::3]:32345"},"services":[{"id":"6d45d856-0e49-4eb7-ad76-989a9ae636a2","details":{"type":"crucible","address":"[fd00:1122:3344:104::3]:32345"}}]},"root":"/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone"},{"zone":{"id":"c8dc7fff-72c8-49eb-a552-d605f8655134","zone_type":"crucible","addresses":["fd00:1122:3344:104::6"],"dataset":{"id":"c8dc7fff-72c8-49eb-a552-d605f8655134","name":{"pool_name":"oxp_22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::6]:32345"},"services":[{"id":"c8dc7fff-72c8-49eb-a552-d605f8655134","details":{"type":"crucible","address":"[fd00:1122:3344:104::6]:32345"}}]},"root":"/pool/ext/22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167/crypt/zone"},{"zone":{"id":"128a90f5-8889-4665-8343-2c7098f2922c","zone_type":"crucible","addresses":["fd00:1122:3344:104::7"],"dataset":{"id":"128a90f5-8889-4665-8343-2c7098f2922c","name":{"pool_name":"oxp_8b3d0b51-c6a5-4d2c-827a-0d0d1471136d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::7]:32345"},"services":[{"id":"128a90f5-8889-4665-8343-2c7098f2922c","details":{"type":"crucible","address":"[fd00:1122:3344:104::7]:32345"}}]},"root":"/pool/ext/29cd042b-e772-4d26-ac85-ef16009950bd/crypt/zone"},{"zone":{"id":"a72f1878-3b03-4267-9024-5df5ebae69de","zone_type":"crucible","addresses":["fd00:1122:3344:104::a"],"dataset":{"id":"a72f1878-3b03-4267-9024-5df5ebae69de","name":{"pool_name":"oxp_e99994ae-61ca-4742-a02c-eb0a8a5b69ff","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::a]:32345"},"services":[{"id":"a72f1878-3b03-4267-9024-5df5ebae69de","details":{"type":"crucible","address":"[fd00:1122:3344:104::a]:32345"}}]},"root":"/pool/ext/8b3d0b51-c6a5-4d2c-827a-0d0d1471136d/crypt/zone"},{"zone":{"id":"6a9165a2-9b66-485a-aaf0-70d89d60bb6c","zone_type":"crucible","addresses":["fd00:1122:3344:104::b"],"dataset":{"id":"6a9165a2-9b66-485a-aaf0-70d89d60bb6c","name":{"pool_name":"oxp_6a02f05f-e400-4c80-8df8-89aaecb6c12b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::b]:32345"},"services":[{"id":"6a9165a2-9b66-485a-aaf0-70d89d60bb6c","details":{"type":"crucible","address":"[fd00:1122:3344:104::b]:32345"}}]},"root":"/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone"},{"zone":{"id":"9677c4ed-96bc-4dcb-ae74-f7a3e9d2b5e2","zone_type":"crucible","addresses":["fd00:1122:3344:104::c"],"dataset":{"id":"9677c4ed-96bc-4dcb-ae74-f7a3e9d2b5e2","name":{"pool_name":"oxp_7c30978f-ee87-4e53-8fdf-3455e5e851b7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::c]:32345"},"services":[{"id":"9677c4ed-96bc-4dcb-ae74-f7a3e9d2b5e2","details":{"type":"crucible","address":"[fd00:1122:3344:104::c]:32345"}}]},"root":"/pool/ext/29cd042b-e772-4d26-ac85-ef16009950bd/crypt/zone"},{"zone":{"id":"179039e7-3ffd-4b76-9379-bef41d42a5ff","zone_type":"crucible","addresses":["fd00:1122:3344:104::8"],"dataset":{"id":"179039e7-3ffd-4b76-9379-bef41d42a5ff","name":{"pool_name":"oxp_4db7e002-e112-4bfc-a41e-8ae26991b01e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::8]:32345"},"services":[{"id":"179039e7-3ffd-4b76-9379-bef41d42a5ff","details":{"type":"crucible","address":"[fd00:1122:3344:104::8]:32345"}}]},"root":"/pool/ext/8b3d0b51-c6a5-4d2c-827a-0d0d1471136d/crypt/zone"},{"zone":{"id":"6067e31e-b6a3-4114-9e49-0296adc8e7af","zone_type":"crucible","addresses":["fd00:1122:3344:104::9"],"dataset":{"id":"6067e31e-b6a3-4114-9e49-0296adc8e7af","name":{"pool_name":"oxp_29cd042b-e772-4d26-ac85-ef16009950bd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:104::9]:32345"},"services":[{"id":"6067e31e-b6a3-4114-9e49-0296adc8e7af","details":{"type":"crucible","address":"[fd00:1122:3344:104::9]:32345"}}]},"root":"/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone"},{"zone":{"id":"440dd615-e11f-4a5d-aeb4-dcf88bb314de","zone_type":"ntp","addresses":["fd00:1122:3344:104::d"],"dataset":null,"services":[{"id":"440dd615-e11f-4a5d-aeb4-dcf88bb314de","details":{"type":"boundary_ntp","address":"[fd00:1122:3344:104::d]:123","ntp_servers":["time.cloudflare.com"],"dns_servers":["1.1.1.1","8.8.8.8"],"domain":null,"nic":{"id":"0b52fe1b-f4cc-43b1-9ac3-4ebb4ab60133","kind":{"type":"service","id":"440dd615-e11f-4a5d-aeb4-dcf88bb314de"},"name":"ntp-440dd615-e11f-4a5d-aeb4-dcf88bb314de","ip":"172.30.3.5","mac":"A8:40:25:FF:85:1E","subnet":"172.30.3.0/24","vni":100,"primary":true,"slot":0},"snat_cfg":{"ip":"45.154.216.38","first_port":0,"last_port":16383}}}]},"root":"/pool/ext/382a2961-cd27-4a9c-901d-468a45ff5708/crypt/zone"},{"zone":{"id":"06e2de03-bd92-404c-a8ea-a13185539d24","zone_type":"internal_dns","addresses":["fd00:1122:3344:1::1"],"dataset":{"id":"06e2de03-bd92-404c-a8ea-a13185539d24","name":{"pool_name":"oxp_b74a84fa-b4c8-4c5f-92f4-f4e62a0a311d","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:1::1]:5353"},"services":[{"id":"06e2de03-bd92-404c-a8ea-a13185539d24","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:1::1]:5353","dns_address":"[fd00:1122:3344:1::1]:53","gz_address":"fd00:1122:3344:1::2","gz_address_index":0}}]},"root":"/pool/ext/e99994ae-61ca-4742-a02c-eb0a8a5b69ff/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled14.json b/sled-agent/tests/old-service-ledgers/rack3-sled14.json new file mode 100644 index 0000000000..89c12a015f --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled14.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"ac35afab-a312-43c3-a42d-04b8e99fcbde","zone_type":"crucible","addresses":["fd00:1122:3344:111::4"],"dataset":{"id":"ac35afab-a312-43c3-a42d-04b8e99fcbde","name":{"pool_name":"oxp_6601065c-c172-4118-81b4-16adde7e9401","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::4]:32345"},"services":[{"id":"ac35afab-a312-43c3-a42d-04b8e99fcbde","details":{"type":"crucible","address":"[fd00:1122:3344:111::4]:32345"}}]},"root":"/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone"},{"zone":{"id":"6cd94da2-35b9-4683-a931-29ad4a5ed0ef","zone_type":"crucible","addresses":["fd00:1122:3344:111::c"],"dataset":{"id":"6cd94da2-35b9-4683-a931-29ad4a5ed0ef","name":{"pool_name":"oxp_58276eba-a53c-4ef3-b374-4cdcde4d6e12","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::c]:32345"},"services":[{"id":"6cd94da2-35b9-4683-a931-29ad4a5ed0ef","details":{"type":"crucible","address":"[fd00:1122:3344:111::c]:32345"}}]},"root":"/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone"},{"zone":{"id":"41f07d39-fcc0-4796-8b7c-7cfcd9135f78","zone_type":"crucible","addresses":["fd00:1122:3344:111::9"],"dataset":{"id":"41f07d39-fcc0-4796-8b7c-7cfcd9135f78","name":{"pool_name":"oxp_4b90abdc-3348-4158-bedc-5bcd56e281d8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::9]:32345"},"services":[{"id":"41f07d39-fcc0-4796-8b7c-7cfcd9135f78","details":{"type":"crucible","address":"[fd00:1122:3344:111::9]:32345"}}]},"root":"/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone"},{"zone":{"id":"44c35566-dd64-4e4a-896e-c50aaa3df14f","zone_type":"nexus","addresses":["fd00:1122:3344:111::3"],"dataset":null,"services":[{"id":"44c35566-dd64-4e4a-896e-c50aaa3df14f","details":{"type":"nexus","internal_address":"[fd00:1122:3344:111::3]:12221","external_ip":"45.154.216.37","nic":{"id":"6f824d20-6ce0-4e8b-9ce3-b12dd2b59913","kind":{"type":"service","id":"44c35566-dd64-4e4a-896e-c50aaa3df14f"},"name":"nexus-44c35566-dd64-4e4a-896e-c50aaa3df14f","ip":"172.30.2.7","mac":"A8:40:25:FF:E8:5F","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","8.8.8.8"]}}]},"root":"/pool/ext/435d7a1b-2865-4d49-903f-a68f464ade4d/crypt/zone"},{"zone":{"id":"e5020d24-8652-456b-bf92-cd7d255a34c5","zone_type":"crucible","addresses":["fd00:1122:3344:111::6"],"dataset":{"id":"e5020d24-8652-456b-bf92-cd7d255a34c5","name":{"pool_name":"oxp_f6925045-363d-4e18-9bde-ee2987b33d21","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::6]:32345"},"services":[{"id":"e5020d24-8652-456b-bf92-cd7d255a34c5","details":{"type":"crucible","address":"[fd00:1122:3344:111::6]:32345"}}]},"root":"/pool/ext/6601065c-c172-4118-81b4-16adde7e9401/crypt/zone"},{"zone":{"id":"8f25f258-afd7-4351-83e4-24220ec0c251","zone_type":"crucible","addresses":["fd00:1122:3344:111::8"],"dataset":{"id":"8f25f258-afd7-4351-83e4-24220ec0c251","name":{"pool_name":"oxp_8e955f54-fbef-4021-9eec-457825468813","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::8]:32345"},"services":[{"id":"8f25f258-afd7-4351-83e4-24220ec0c251","details":{"type":"crucible","address":"[fd00:1122:3344:111::8]:32345"}}]},"root":"/pool/ext/6601065c-c172-4118-81b4-16adde7e9401/crypt/zone"},{"zone":{"id":"26aa50ec-d70a-47ea-85fc-e55c62a2e0c6","zone_type":"crucible","addresses":["fd00:1122:3344:111::5"],"dataset":{"id":"26aa50ec-d70a-47ea-85fc-e55c62a2e0c6","name":{"pool_name":"oxp_24d7e250-9fc6-459e-8155-30f8e8ccb28c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::5]:32345"},"services":[{"id":"26aa50ec-d70a-47ea-85fc-e55c62a2e0c6","details":{"type":"crucible","address":"[fd00:1122:3344:111::5]:32345"}}]},"root":"/pool/ext/435d7a1b-2865-4d49-903f-a68f464ade4d/crypt/zone"},{"zone":{"id":"68dc212f-a96a-420f-8334-b11ee5d7cb95","zone_type":"crucible","addresses":["fd00:1122:3344:111::7"],"dataset":{"id":"68dc212f-a96a-420f-8334-b11ee5d7cb95","name":{"pool_name":"oxp_4353b00b-937e-4d07-aea6-014c57b6f12c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::7]:32345"},"services":[{"id":"68dc212f-a96a-420f-8334-b11ee5d7cb95","details":{"type":"crucible","address":"[fd00:1122:3344:111::7]:32345"}}]},"root":"/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone"},{"zone":{"id":"475140fa-a5dc-4ec1-876d-751c48adfc37","zone_type":"crucible","addresses":["fd00:1122:3344:111::a"],"dataset":{"id":"475140fa-a5dc-4ec1-876d-751c48adfc37","name":{"pool_name":"oxp_ee55b053-6874-4e20-86b5-2e105e64c068","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::a]:32345"},"services":[{"id":"475140fa-a5dc-4ec1-876d-751c48adfc37","details":{"type":"crucible","address":"[fd00:1122:3344:111::a]:32345"}}]},"root":"/pool/ext/ee55b053-6874-4e20-86b5-2e105e64c068/crypt/zone"},{"zone":{"id":"09d5a8c9-00db-4914-a2c6-7ae3d2da4558","zone_type":"crucible","addresses":["fd00:1122:3344:111::d"],"dataset":{"id":"09d5a8c9-00db-4914-a2c6-7ae3d2da4558","name":{"pool_name":"oxp_9ab5aba5-47dc-4bc4-8f6d-7cbe0f98a9a2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::d]:32345"},"services":[{"id":"09d5a8c9-00db-4914-a2c6-7ae3d2da4558","details":{"type":"crucible","address":"[fd00:1122:3344:111::d]:32345"}}]},"root":"/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone"},{"zone":{"id":"014f6a39-ad64-4f0a-9fef-01ca0d184cbf","zone_type":"crucible","addresses":["fd00:1122:3344:111::b"],"dataset":{"id":"014f6a39-ad64-4f0a-9fef-01ca0d184cbf","name":{"pool_name":"oxp_435d7a1b-2865-4d49-903f-a68f464ade4d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:111::b]:32345"},"services":[{"id":"014f6a39-ad64-4f0a-9fef-01ca0d184cbf","details":{"type":"crucible","address":"[fd00:1122:3344:111::b]:32345"}}]},"root":"/pool/ext/f6925045-363d-4e18-9bde-ee2987b33d21/crypt/zone"},{"zone":{"id":"aceaf348-ba07-4965-a543-63a800826fe8","zone_type":"ntp","addresses":["fd00:1122:3344:111::e"],"dataset":null,"services":[{"id":"aceaf348-ba07-4965-a543-63a800826fe8","details":{"type":"internal_ntp","address":"[fd00:1122:3344:111::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled15.json b/sled-agent/tests/old-service-ledgers/rack3-sled15.json new file mode 100644 index 0000000000..880f29409e --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled15.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"09a9ecee-1e7c-4819-b27a-73bb61099ce7","zone_type":"external_dns","addresses":["fd00:1122:3344:114::3"],"dataset":{"id":"09a9ecee-1e7c-4819-b27a-73bb61099ce7","name":{"pool_name":"oxp_b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e","kind":{"type":"external_dns"}},"service_address":"[fd00:1122:3344:114::3]:5353"},"services":[{"id":"09a9ecee-1e7c-4819-b27a-73bb61099ce7","details":{"type":"external_dns","http_address":"[fd00:1122:3344:114::3]:5353","dns_address":"45.154.216.33:53","nic":{"id":"400ca77b-7fee-47d5-8f17-1f4b9c729f27","kind":{"type":"service","id":"09a9ecee-1e7c-4819-b27a-73bb61099ce7"},"name":"external-dns-09a9ecee-1e7c-4819-b27a-73bb61099ce7","ip":"172.30.1.5","mac":"A8:40:25:FF:B7:C7","subnet":"172.30.1.0/24","vni":100,"primary":true,"slot":0}}}]},"root":"/pool/ext/9e878b1e-bf92-4155-8162-640851c2f5d5/crypt/zone"},{"zone":{"id":"1792e003-55f7-49b8-906c-4160db91bc23","zone_type":"crucible","addresses":["fd00:1122:3344:114::5"],"dataset":{"id":"1792e003-55f7-49b8-906c-4160db91bc23","name":{"pool_name":"oxp_7f3a760f-a4c0-456f-8a22-2d06ecac1022","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::5]:32345"},"services":[{"id":"1792e003-55f7-49b8-906c-4160db91bc23","details":{"type":"crucible","address":"[fd00:1122:3344:114::5]:32345"}}]},"root":"/pool/ext/76f09ad5-c96c-4748-bbe4-71afaea7bc5e/crypt/zone"},{"zone":{"id":"73bc7c0e-1034-449f-8920-4a1f418653ff","zone_type":"crucible","addresses":["fd00:1122:3344:114::8"],"dataset":{"id":"73bc7c0e-1034-449f-8920-4a1f418653ff","name":{"pool_name":"oxp_e87037be-1cdf-4c6e-a8a3-c27b830eaef9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::8]:32345"},"services":[{"id":"73bc7c0e-1034-449f-8920-4a1f418653ff","details":{"type":"crucible","address":"[fd00:1122:3344:114::8]:32345"}}]},"root":"/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone"},{"zone":{"id":"06dc6619-6251-4543-9a10-da1698af49d5","zone_type":"crucible","addresses":["fd00:1122:3344:114::9"],"dataset":{"id":"06dc6619-6251-4543-9a10-da1698af49d5","name":{"pool_name":"oxp_ee34c530-ce70-4f1a-8c97-d0ebb77ccfc8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::9]:32345"},"services":[{"id":"06dc6619-6251-4543-9a10-da1698af49d5","details":{"type":"crucible","address":"[fd00:1122:3344:114::9]:32345"}}]},"root":"/pool/ext/9e878b1e-bf92-4155-8162-640851c2f5d5/crypt/zone"},{"zone":{"id":"0d796c52-37ca-490d-b42f-dcc22fe5fd6b","zone_type":"crucible","addresses":["fd00:1122:3344:114::c"],"dataset":{"id":"0d796c52-37ca-490d-b42f-dcc22fe5fd6b","name":{"pool_name":"oxp_9ec2b893-d486-4b24-a077-1a297f9eb15f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::c]:32345"},"services":[{"id":"0d796c52-37ca-490d-b42f-dcc22fe5fd6b","details":{"type":"crucible","address":"[fd00:1122:3344:114::c]:32345"}}]},"root":"/pool/ext/9e72c0e2-4895-4791-b606-2f18e432fb69/crypt/zone"},{"zone":{"id":"91d0011f-de44-4823-bc26-a447affa39bc","zone_type":"crucible","addresses":["fd00:1122:3344:114::a"],"dataset":{"id":"91d0011f-de44-4823-bc26-a447affa39bc","name":{"pool_name":"oxp_85e81a14-031d-4a63-a91f-981c64e91f60","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::a]:32345"},"services":[{"id":"91d0011f-de44-4823-bc26-a447affa39bc","details":{"type":"crucible","address":"[fd00:1122:3344:114::a]:32345"}}]},"root":"/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone"},{"zone":{"id":"0c44a2f1-559a-459c-9931-e0e7964d41c6","zone_type":"crucible","addresses":["fd00:1122:3344:114::b"],"dataset":{"id":"0c44a2f1-559a-459c-9931-e0e7964d41c6","name":{"pool_name":"oxp_76f09ad5-c96c-4748-bbe4-71afaea7bc5e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::b]:32345"},"services":[{"id":"0c44a2f1-559a-459c-9931-e0e7964d41c6","details":{"type":"crucible","address":"[fd00:1122:3344:114::b]:32345"}}]},"root":"/pool/ext/e87037be-1cdf-4c6e-a8a3-c27b830eaef9/crypt/zone"},{"zone":{"id":"ea363819-96f6-4fb6-a203-f18414f1c60e","zone_type":"crucible","addresses":["fd00:1122:3344:114::4"],"dataset":{"id":"ea363819-96f6-4fb6-a203-f18414f1c60e","name":{"pool_name":"oxp_b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::4]:32345"},"services":[{"id":"ea363819-96f6-4fb6-a203-f18414f1c60e","details":{"type":"crucible","address":"[fd00:1122:3344:114::4]:32345"}}]},"root":"/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone"},{"zone":{"id":"21592c39-da6b-4527-842e-edeeceffafa1","zone_type":"crucible","addresses":["fd00:1122:3344:114::6"],"dataset":{"id":"21592c39-da6b-4527-842e-edeeceffafa1","name":{"pool_name":"oxp_9e72c0e2-4895-4791-b606-2f18e432fb69","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::6]:32345"},"services":[{"id":"21592c39-da6b-4527-842e-edeeceffafa1","details":{"type":"crucible","address":"[fd00:1122:3344:114::6]:32345"}}]},"root":"/pool/ext/7aff8429-b65d-4a53-a796-7221ac7581a9/crypt/zone"},{"zone":{"id":"f33b1263-f1b2-43a6-a8aa-5f8570dd4e72","zone_type":"crucible","addresses":["fd00:1122:3344:114::7"],"dataset":{"id":"f33b1263-f1b2-43a6-a8aa-5f8570dd4e72","name":{"pool_name":"oxp_9e878b1e-bf92-4155-8162-640851c2f5d5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::7]:32345"},"services":[{"id":"f33b1263-f1b2-43a6-a8aa-5f8570dd4e72","details":{"type":"crucible","address":"[fd00:1122:3344:114::7]:32345"}}]},"root":"/pool/ext/7f3a760f-a4c0-456f-8a22-2d06ecac1022/crypt/zone"},{"zone":{"id":"6f42b469-5a36-4048-a152-e884f7e8a206","zone_type":"crucible","addresses":["fd00:1122:3344:114::d"],"dataset":{"id":"6f42b469-5a36-4048-a152-e884f7e8a206","name":{"pool_name":"oxp_7aff8429-b65d-4a53-a796-7221ac7581a9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:114::d]:32345"},"services":[{"id":"6f42b469-5a36-4048-a152-e884f7e8a206","details":{"type":"crucible","address":"[fd00:1122:3344:114::d]:32345"}}]},"root":"/pool/ext/9e72c0e2-4895-4791-b606-2f18e432fb69/crypt/zone"},{"zone":{"id":"ad77d594-8f78-4d33-a5e4-59887060178e","zone_type":"ntp","addresses":["fd00:1122:3344:114::e"],"dataset":null,"services":[{"id":"ad77d594-8f78-4d33-a5e4-59887060178e","details":{"type":"internal_ntp","address":"[fd00:1122:3344:114::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/85e81a14-031d-4a63-a91f-981c64e91f60/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled16.json b/sled-agent/tests/old-service-ledgers/rack3-sled16.json new file mode 100644 index 0000000000..3a1cbeb411 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled16.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"dcb9a4ae-2c89-4a74-905b-b7936ff49c19","zone_type":"crucible","addresses":["fd00:1122:3344:11f::9"],"dataset":{"id":"dcb9a4ae-2c89-4a74-905b-b7936ff49c19","name":{"pool_name":"oxp_af509039-d27f-4095-bc9d-cecbc5c606db","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::9]:32345"},"services":[{"id":"dcb9a4ae-2c89-4a74-905b-b7936ff49c19","details":{"type":"crucible","address":"[fd00:1122:3344:11f::9]:32345"}}]},"root":"/pool/ext/44ee0fb4-6034-44e8-b3de-b3a44457ffca/crypt/zone"},{"zone":{"id":"dbd46f71-ec39-4b72-a77d-9d281ccb37e0","zone_type":"crucible","addresses":["fd00:1122:3344:11f::b"],"dataset":{"id":"dbd46f71-ec39-4b72-a77d-9d281ccb37e0","name":{"pool_name":"oxp_44ee0fb4-6034-44e8-b3de-b3a44457ffca","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::b]:32345"},"services":[{"id":"dbd46f71-ec39-4b72-a77d-9d281ccb37e0","details":{"type":"crucible","address":"[fd00:1122:3344:11f::b]:32345"}}]},"root":"/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone"},{"zone":{"id":"a1f30569-a5c6-4a6d-922e-241966aea142","zone_type":"crucible","addresses":["fd00:1122:3344:11f::6"],"dataset":{"id":"a1f30569-a5c6-4a6d-922e-241966aea142","name":{"pool_name":"oxp_d2133e8b-51cc-455e-89d0-5454fd4fe109","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::6]:32345"},"services":[{"id":"a1f30569-a5c6-4a6d-922e-241966aea142","details":{"type":"crucible","address":"[fd00:1122:3344:11f::6]:32345"}}]},"root":"/pool/ext/3f57835b-1469-499a-8757-7cc56acc5d49/crypt/zone"},{"zone":{"id":"a33e25ae-4e41-40f4-843d-3d12f62d8cb6","zone_type":"crucible","addresses":["fd00:1122:3344:11f::8"],"dataset":{"id":"a33e25ae-4e41-40f4-843d-3d12f62d8cb6","name":{"pool_name":"oxp_c8e4a7f4-1ae6-4683-8397-ea53475a53e8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::8]:32345"},"services":[{"id":"a33e25ae-4e41-40f4-843d-3d12f62d8cb6","details":{"type":"crucible","address":"[fd00:1122:3344:11f::8]:32345"}}]},"root":"/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone"},{"zone":{"id":"65ed75c2-2d80-4de5-a6f6-adfa6516c7cf","zone_type":"crucible","addresses":["fd00:1122:3344:11f::c"],"dataset":{"id":"65ed75c2-2d80-4de5-a6f6-adfa6516c7cf","name":{"pool_name":"oxp_3f57835b-1469-499a-8757-7cc56acc5d49","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::c]:32345"},"services":[{"id":"65ed75c2-2d80-4de5-a6f6-adfa6516c7cf","details":{"type":"crucible","address":"[fd00:1122:3344:11f::c]:32345"}}]},"root":"/pool/ext/cd8cd75c-632b-4527-889a-7ca0c080fe2c/crypt/zone"},{"zone":{"id":"bc6ccf18-6b9b-4687-8b70-c7917d972ae0","zone_type":"crucible","addresses":["fd00:1122:3344:11f::a"],"dataset":{"id":"bc6ccf18-6b9b-4687-8b70-c7917d972ae0","name":{"pool_name":"oxp_cd8cd75c-632b-4527-889a-7ca0c080fe2c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::a]:32345"},"services":[{"id":"bc6ccf18-6b9b-4687-8b70-c7917d972ae0","details":{"type":"crucible","address":"[fd00:1122:3344:11f::a]:32345"}}]},"root":"/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone"},{"zone":{"id":"06233bfe-a857-4819-aefe-212af9eeb90f","zone_type":"crucible","addresses":["fd00:1122:3344:11f::5"],"dataset":{"id":"06233bfe-a857-4819-aefe-212af9eeb90f","name":{"pool_name":"oxp_c8a1aaf1-d27c-45fd-9f8d-80ac6bf6865d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::5]:32345"},"services":[{"id":"06233bfe-a857-4819-aefe-212af9eeb90f","details":{"type":"crucible","address":"[fd00:1122:3344:11f::5]:32345"}}]},"root":"/pool/ext/af509039-d27f-4095-bc9d-cecbc5c606db/crypt/zone"},{"zone":{"id":"0bbfef71-9eae-43b6-b5e7-0060ce9269dd","zone_type":"crucible","addresses":["fd00:1122:3344:11f::4"],"dataset":{"id":"0bbfef71-9eae-43b6-b5e7-0060ce9269dd","name":{"pool_name":"oxp_5e32c0a3-1210-402b-91fb-256946eeac2b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::4]:32345"},"services":[{"id":"0bbfef71-9eae-43b6-b5e7-0060ce9269dd","details":{"type":"crucible","address":"[fd00:1122:3344:11f::4]:32345"}}]},"root":"/pool/ext/af509039-d27f-4095-bc9d-cecbc5c606db/crypt/zone"},{"zone":{"id":"550e10ee-24d1-444f-80be-2744dd321e0f","zone_type":"crucible","addresses":["fd00:1122:3344:11f::7"],"dataset":{"id":"550e10ee-24d1-444f-80be-2744dd321e0f","name":{"pool_name":"oxp_f437ce0e-eb45-4be8-b1fe-33ed2656eb01","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11f::7]:32345"},"services":[{"id":"550e10ee-24d1-444f-80be-2744dd321e0f","details":{"type":"crucible","address":"[fd00:1122:3344:11f::7]:32345"}}]},"root":"/pool/ext/44ee0fb4-6034-44e8-b3de-b3a44457ffca/crypt/zone"},{"zone":{"id":"86d768f3-ece2-4956-983f-999bdb23a983","zone_type":"cockroach_db","addresses":["fd00:1122:3344:11f::3"],"dataset":{"id":"86d768f3-ece2-4956-983f-999bdb23a983","name":{"pool_name":"oxp_5e32c0a3-1210-402b-91fb-256946eeac2b","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:11f::3]:32221"},"services":[{"id":"86d768f3-ece2-4956-983f-999bdb23a983","details":{"type":"cockroach_db","address":"[fd00:1122:3344:11f::3]:32221"}}]},"root":"/pool/ext/c8a1aaf1-d27c-45fd-9f8d-80ac6bf6865d/crypt/zone"},{"zone":{"id":"2f358812-f72c-4838-a5ea-7d78d0954be0","zone_type":"ntp","addresses":["fd00:1122:3344:11f::d"],"dataset":null,"services":[{"id":"2f358812-f72c-4838-a5ea-7d78d0954be0","details":{"type":"internal_ntp","address":"[fd00:1122:3344:11f::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/f437ce0e-eb45-4be8-b1fe-33ed2656eb01/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled17.json b/sled-agent/tests/old-service-ledgers/rack3-sled17.json new file mode 100644 index 0000000000..4063fed2e2 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled17.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"525a19a2-d4ac-418d-bdcf-2ce26e7abe70","zone_type":"crucible","addresses":["fd00:1122:3344:107::a"],"dataset":{"id":"525a19a2-d4ac-418d-bdcf-2ce26e7abe70","name":{"pool_name":"oxp_cb774d2f-ff86-4fd7-866b-17a6b10e61f0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::a]:32345"},"services":[{"id":"525a19a2-d4ac-418d-bdcf-2ce26e7abe70","details":{"type":"crucible","address":"[fd00:1122:3344:107::a]:32345"}}]},"root":"/pool/ext/e17b68b5-f50c-4fc3-b55a-80d284c6c32d/crypt/zone"},{"zone":{"id":"7af188e1-6175-4769-9e4f-2ca7a98b76f6","zone_type":"crucible","addresses":["fd00:1122:3344:107::4"],"dataset":{"id":"7af188e1-6175-4769-9e4f-2ca7a98b76f6","name":{"pool_name":"oxp_0cbbcf22-770d-4e75-9148-e6109b129093","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::4]:32345"},"services":[{"id":"7af188e1-6175-4769-9e4f-2ca7a98b76f6","details":{"type":"crucible","address":"[fd00:1122:3344:107::4]:32345"}}]},"root":"/pool/ext/b998e8df-ea69-4bdd-84cb-b7f17075b060/crypt/zone"},{"zone":{"id":"2544540f-6ffc-46c0-84bf-f42a110c02d7","zone_type":"crucible","addresses":["fd00:1122:3344:107::6"],"dataset":{"id":"2544540f-6ffc-46c0-84bf-f42a110c02d7","name":{"pool_name":"oxp_e17b68b5-f50c-4fc3-b55a-80d284c6c32d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::6]:32345"},"services":[{"id":"2544540f-6ffc-46c0-84bf-f42a110c02d7","details":{"type":"crucible","address":"[fd00:1122:3344:107::6]:32345"}}]},"root":"/pool/ext/521fa477-4d83-49a8-a5cf-c267b7f0c409/crypt/zone"},{"zone":{"id":"cfc20f72-cac2-4681-a6d8-e5a0accafbb7","zone_type":"crucible","addresses":["fd00:1122:3344:107::7"],"dataset":{"id":"cfc20f72-cac2-4681-a6d8-e5a0accafbb7","name":{"pool_name":"oxp_b998e8df-ea69-4bdd-84cb-b7f17075b060","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::7]:32345"},"services":[{"id":"cfc20f72-cac2-4681-a6d8-e5a0accafbb7","details":{"type":"crucible","address":"[fd00:1122:3344:107::7]:32345"}}]},"root":"/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone"},{"zone":{"id":"e24be791-5773-425e-a3df-e35ca81570c7","zone_type":"crucible","addresses":["fd00:1122:3344:107::9"],"dataset":{"id":"e24be791-5773-425e-a3df-e35ca81570c7","name":{"pool_name":"oxp_7849c221-dc7f-43ac-ac47-bc51864e083b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::9]:32345"},"services":[{"id":"e24be791-5773-425e-a3df-e35ca81570c7","details":{"type":"crucible","address":"[fd00:1122:3344:107::9]:32345"}}]},"root":"/pool/ext/7849c221-dc7f-43ac-ac47-bc51864e083b/crypt/zone"},{"zone":{"id":"170856ee-21cf-4780-8903-175d558bc7cc","zone_type":"crucible","addresses":["fd00:1122:3344:107::3"],"dataset":{"id":"170856ee-21cf-4780-8903-175d558bc7cc","name":{"pool_name":"oxp_618e21e5-77d4-40ba-9f8e-7960e9ad92e2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::3]:32345"},"services":[{"id":"170856ee-21cf-4780-8903-175d558bc7cc","details":{"type":"crucible","address":"[fd00:1122:3344:107::3]:32345"}}]},"root":"/pool/ext/aa7a37fb-2f03-4d5c-916b-db3a4fc269ac/crypt/zone"},{"zone":{"id":"604278ff-525a-4d41-82ff-07aef3174d38","zone_type":"crucible","addresses":["fd00:1122:3344:107::5"],"dataset":{"id":"604278ff-525a-4d41-82ff-07aef3174d38","name":{"pool_name":"oxp_521fa477-4d83-49a8-a5cf-c267b7f0c409","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::5]:32345"},"services":[{"id":"604278ff-525a-4d41-82ff-07aef3174d38","details":{"type":"crucible","address":"[fd00:1122:3344:107::5]:32345"}}]},"root":"/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone"},{"zone":{"id":"d0d4fcc0-6ed0-410a-99c7-5daf34014421","zone_type":"crucible","addresses":["fd00:1122:3344:107::b"],"dataset":{"id":"d0d4fcc0-6ed0-410a-99c7-5daf34014421","name":{"pool_name":"oxp_aa7a37fb-2f03-4d5c-916b-db3a4fc269ac","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::b]:32345"},"services":[{"id":"d0d4fcc0-6ed0-410a-99c7-5daf34014421","details":{"type":"crucible","address":"[fd00:1122:3344:107::b]:32345"}}]},"root":"/pool/ext/aa7a37fb-2f03-4d5c-916b-db3a4fc269ac/crypt/zone"},{"zone":{"id":"c935df7b-2629-48ee-bc10-20508301905d","zone_type":"crucible","addresses":["fd00:1122:3344:107::c"],"dataset":{"id":"c935df7b-2629-48ee-bc10-20508301905d","name":{"pool_name":"oxp_793fd018-5fdc-4e54-9c45-f8023fa3ea18","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::c]:32345"},"services":[{"id":"c935df7b-2629-48ee-bc10-20508301905d","details":{"type":"crucible","address":"[fd00:1122:3344:107::c]:32345"}}]},"root":"/pool/ext/7849c221-dc7f-43ac-ac47-bc51864e083b/crypt/zone"},{"zone":{"id":"4ba5f3b6-8be5-4a85-bc57-a5e3b0b867d8","zone_type":"crucible","addresses":["fd00:1122:3344:107::8"],"dataset":{"id":"4ba5f3b6-8be5-4a85-bc57-a5e3b0b867d8","name":{"pool_name":"oxp_e80e7996-c572-481e-8c22-61c16c6e47f4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:107::8]:32345"},"services":[{"id":"4ba5f3b6-8be5-4a85-bc57-a5e3b0b867d8","details":{"type":"crucible","address":"[fd00:1122:3344:107::8]:32345"}}]},"root":"/pool/ext/e17b68b5-f50c-4fc3-b55a-80d284c6c32d/crypt/zone"},{"zone":{"id":"395c9d6e-3bd0-445e-9269-46c3260edb83","zone_type":"ntp","addresses":["fd00:1122:3344:107::d"],"dataset":null,"services":[{"id":"395c9d6e-3bd0-445e-9269-46c3260edb83","details":{"type":"internal_ntp","address":"[fd00:1122:3344:107::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled18.json b/sled-agent/tests/old-service-ledgers/rack3-sled18.json new file mode 100644 index 0000000000..f47e912424 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled18.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"c7096dd4-e429-4a6f-9725-041a77ef2513","zone_type":"crucible","addresses":["fd00:1122:3344:11a::6"],"dataset":{"id":"c7096dd4-e429-4a6f-9725-041a77ef2513","name":{"pool_name":"oxp_dcf62af6-c0f9-4eb5-9b23-9424ef8f3d32","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::6]:32345"},"services":[{"id":"c7096dd4-e429-4a6f-9725-041a77ef2513","details":{"type":"crucible","address":"[fd00:1122:3344:11a::6]:32345"}}]},"root":"/pool/ext/b869e463-c8b9-4c12-a6b9-13175b3896dd/crypt/zone"},{"zone":{"id":"09dd367f-b32f-43f3-aa53-11ccec1cd0c9","zone_type":"crucible","addresses":["fd00:1122:3344:11a::9"],"dataset":{"id":"09dd367f-b32f-43f3-aa53-11ccec1cd0c9","name":{"pool_name":"oxp_d7d00317-42c7-4d1e-a04c-85491fb230cd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::9]:32345"},"services":[{"id":"09dd367f-b32f-43f3-aa53-11ccec1cd0c9","details":{"type":"crucible","address":"[fd00:1122:3344:11a::9]:32345"}}]},"root":"/pool/ext/d7d00317-42c7-4d1e-a04c-85491fb230cd/crypt/zone"},{"zone":{"id":"fb2f85f1-05b3-432f-9bb5-63fb27a762b1","zone_type":"crucible","addresses":["fd00:1122:3344:11a::5"],"dataset":{"id":"fb2f85f1-05b3-432f-9bb5-63fb27a762b1","name":{"pool_name":"oxp_db4a9949-68da-4c1c-9a1c-49083eba14fe","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::5]:32345"},"services":[{"id":"fb2f85f1-05b3-432f-9bb5-63fb27a762b1","details":{"type":"crucible","address":"[fd00:1122:3344:11a::5]:32345"}}]},"root":"/pool/ext/db4a9949-68da-4c1c-9a1c-49083eba14fe/crypt/zone"},{"zone":{"id":"5b89425e-69e4-4305-8f33-dc5768a1849e","zone_type":"crucible","addresses":["fd00:1122:3344:11a::a"],"dataset":{"id":"5b89425e-69e4-4305-8f33-dc5768a1849e","name":{"pool_name":"oxp_64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::a]:32345"},"services":[{"id":"5b89425e-69e4-4305-8f33-dc5768a1849e","details":{"type":"crucible","address":"[fd00:1122:3344:11a::a]:32345"}}]},"root":"/pool/ext/64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e/crypt/zone"},{"zone":{"id":"a5156db4-273a-4f8b-b8d8-df77062a6c63","zone_type":"crucible","addresses":["fd00:1122:3344:11a::4"],"dataset":{"id":"a5156db4-273a-4f8b-b8d8-df77062a6c63","name":{"pool_name":"oxp_b869e463-c8b9-4c12-a6b9-13175b3896dd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::4]:32345"},"services":[{"id":"a5156db4-273a-4f8b-b8d8-df77062a6c63","details":{"type":"crucible","address":"[fd00:1122:3344:11a::4]:32345"}}]},"root":"/pool/ext/dcf62af6-c0f9-4eb5-9b23-9424ef8f3d32/crypt/zone"},{"zone":{"id":"1f2d2f86-b69b-4130-bb9b-e62ba0cb6802","zone_type":"crucible","addresses":["fd00:1122:3344:11a::b"],"dataset":{"id":"1f2d2f86-b69b-4130-bb9b-e62ba0cb6802","name":{"pool_name":"oxp_153ffee4-5d7a-4786-ad33-d5567b434fe0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::b]:32345"},"services":[{"id":"1f2d2f86-b69b-4130-bb9b-e62ba0cb6802","details":{"type":"crucible","address":"[fd00:1122:3344:11a::b]:32345"}}]},"root":"/pool/ext/174a067d-1c5a-49f7-a29f-1e62ab1c3796/crypt/zone"},{"zone":{"id":"1e249cc9-52e7-4d66-b713-8ace1392e991","zone_type":"crucible","addresses":["fd00:1122:3344:11a::7"],"dataset":{"id":"1e249cc9-52e7-4d66-b713-8ace1392e991","name":{"pool_name":"oxp_04b6215e-9651-4a3c-ba1b-b8a1e67b3d89","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::7]:32345"},"services":[{"id":"1e249cc9-52e7-4d66-b713-8ace1392e991","details":{"type":"crucible","address":"[fd00:1122:3344:11a::7]:32345"}}]},"root":"/pool/ext/db4a9949-68da-4c1c-9a1c-49083eba14fe/crypt/zone"},{"zone":{"id":"eb779538-2b1b-4d1d-8c7e-b15f04db6e53","zone_type":"crucible","addresses":["fd00:1122:3344:11a::3"],"dataset":{"id":"eb779538-2b1b-4d1d-8c7e-b15f04db6e53","name":{"pool_name":"oxp_aacb8524-3562-4f97-a616-9023230d6efa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::3]:32345"},"services":[{"id":"eb779538-2b1b-4d1d-8c7e-b15f04db6e53","details":{"type":"crucible","address":"[fd00:1122:3344:11a::3]:32345"}}]},"root":"/pool/ext/174a067d-1c5a-49f7-a29f-1e62ab1c3796/crypt/zone"},{"zone":{"id":"b575d52d-be7d-46af-814b-91e6d18f3464","zone_type":"crucible","addresses":["fd00:1122:3344:11a::8"],"dataset":{"id":"b575d52d-be7d-46af-814b-91e6d18f3464","name":{"pool_name":"oxp_174a067d-1c5a-49f7-a29f-1e62ab1c3796","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::8]:32345"},"services":[{"id":"b575d52d-be7d-46af-814b-91e6d18f3464","details":{"type":"crucible","address":"[fd00:1122:3344:11a::8]:32345"}}]},"root":"/pool/ext/64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e/crypt/zone"},{"zone":{"id":"274200bc-eac7-47d7-8a57-4b7be794caba","zone_type":"crucible","addresses":["fd00:1122:3344:11a::c"],"dataset":{"id":"274200bc-eac7-47d7-8a57-4b7be794caba","name":{"pool_name":"oxp_2e7644e4-7d46-42bf-8e7a-9c3f39085b3f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11a::c]:32345"},"services":[{"id":"274200bc-eac7-47d7-8a57-4b7be794caba","details":{"type":"crucible","address":"[fd00:1122:3344:11a::c]:32345"}}]},"root":"/pool/ext/2e7644e4-7d46-42bf-8e7a-9c3f39085b3f/crypt/zone"},{"zone":{"id":"bc20ba3a-df62-4a62-97c2-75b5653f84b4","zone_type":"ntp","addresses":["fd00:1122:3344:11a::d"],"dataset":null,"services":[{"id":"bc20ba3a-df62-4a62-97c2-75b5653f84b4","details":{"type":"internal_ntp","address":"[fd00:1122:3344:11a::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/04b6215e-9651-4a3c-ba1b-b8a1e67b3d89/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled19.json b/sled-agent/tests/old-service-ledgers/rack3-sled19.json new file mode 100644 index 0000000000..c450320a73 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled19.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"9c73abb9-edb8-4aa2-835b-c25ebe4466d9","zone_type":"crucible","addresses":["fd00:1122:3344:109::7"],"dataset":{"id":"9c73abb9-edb8-4aa2-835b-c25ebe4466d9","name":{"pool_name":"oxp_b7a3032f-7b8c-4a6a-9fa2-e5773bfdbc94","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::7]:32345"},"services":[{"id":"9c73abb9-edb8-4aa2-835b-c25ebe4466d9","details":{"type":"crucible","address":"[fd00:1122:3344:109::7]:32345"}}]},"root":"/pool/ext/46d21f3d-23be-4361-b5c5-9d0f6ece5b8c/crypt/zone"},{"zone":{"id":"ca576bda-cbdd-4bb9-9d75-ce06d569e926","zone_type":"crucible","addresses":["fd00:1122:3344:109::a"],"dataset":{"id":"ca576bda-cbdd-4bb9-9d75-ce06d569e926","name":{"pool_name":"oxp_863c4bc4-9c7e-453c-99d8-a3d509f49f3e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::a]:32345"},"services":[{"id":"ca576bda-cbdd-4bb9-9d75-ce06d569e926","details":{"type":"crucible","address":"[fd00:1122:3344:109::a]:32345"}}]},"root":"/pool/ext/7e67cb32-0c00-4090-9647-eb7bae75deeb/crypt/zone"},{"zone":{"id":"f010978d-346e-49cd-b265-7607a25685f9","zone_type":"crucible","addresses":["fd00:1122:3344:109::c"],"dataset":{"id":"f010978d-346e-49cd-b265-7607a25685f9","name":{"pool_name":"oxp_9bc1dab8-2d2a-4f92-bdfb-94ebca7881f1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::c]:32345"},"services":[{"id":"f010978d-346e-49cd-b265-7607a25685f9","details":{"type":"crucible","address":"[fd00:1122:3344:109::c]:32345"}}]},"root":"/pool/ext/9bc1dab8-2d2a-4f92-bdfb-94ebca7881f1/crypt/zone"},{"zone":{"id":"daff4162-cc81-4586-a457-91d767b8f1d9","zone_type":"crucible","addresses":["fd00:1122:3344:109::6"],"dataset":{"id":"daff4162-cc81-4586-a457-91d767b8f1d9","name":{"pool_name":"oxp_b9b5b50c-e823-41ae-9585-01b818883521","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::6]:32345"},"services":[{"id":"daff4162-cc81-4586-a457-91d767b8f1d9","details":{"type":"crucible","address":"[fd00:1122:3344:109::6]:32345"}}]},"root":"/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone"},{"zone":{"id":"9f300d3d-e698-4cc8-be4c-1f81ac8c927f","zone_type":"crucible","addresses":["fd00:1122:3344:109::d"],"dataset":{"id":"9f300d3d-e698-4cc8-be4c-1f81ac8c927f","name":{"pool_name":"oxp_f1d82c22-ad7d-4cda-9ab0-8f5f496d90ce","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::d]:32345"},"services":[{"id":"9f300d3d-e698-4cc8-be4c-1f81ac8c927f","details":{"type":"crucible","address":"[fd00:1122:3344:109::d]:32345"}}]},"root":"/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone"},{"zone":{"id":"8db7c7be-da40-4a1c-9681-4d02606a7eb7","zone_type":"crucible","addresses":["fd00:1122:3344:109::9"],"dataset":{"id":"8db7c7be-da40-4a1c-9681-4d02606a7eb7","name":{"pool_name":"oxp_46d21f3d-23be-4361-b5c5-9d0f6ece5b8c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::9]:32345"},"services":[{"id":"8db7c7be-da40-4a1c-9681-4d02606a7eb7","details":{"type":"crucible","address":"[fd00:1122:3344:109::9]:32345"}}]},"root":"/pool/ext/b7a3032f-7b8c-4a6a-9fa2-e5773bfdbc94/crypt/zone"},{"zone":{"id":"b990911b-805a-4f9d-bd83-e977f5b19a35","zone_type":"crucible","addresses":["fd00:1122:3344:109::4"],"dataset":{"id":"b990911b-805a-4f9d-bd83-e977f5b19a35","name":{"pool_name":"oxp_7e67cb32-0c00-4090-9647-eb7bae75deeb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::4]:32345"},"services":[{"id":"b990911b-805a-4f9d-bd83-e977f5b19a35","details":{"type":"crucible","address":"[fd00:1122:3344:109::4]:32345"}}]},"root":"/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone"},{"zone":{"id":"c99392f5-8f30-41ac-9eeb-12d7f4b707f1","zone_type":"crucible","addresses":["fd00:1122:3344:109::b"],"dataset":{"id":"c99392f5-8f30-41ac-9eeb-12d7f4b707f1","name":{"pool_name":"oxp_de682b18-afaf-4d53-b62e-934f6bd4a1f8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::b]:32345"},"services":[{"id":"c99392f5-8f30-41ac-9eeb-12d7f4b707f1","details":{"type":"crucible","address":"[fd00:1122:3344:109::b]:32345"}}]},"root":"/pool/ext/46d21f3d-23be-4361-b5c5-9d0f6ece5b8c/crypt/zone"},{"zone":{"id":"7f6cb339-9eb1-4866-8a4f-383bad25b36f","zone_type":"crucible","addresses":["fd00:1122:3344:109::5"],"dataset":{"id":"7f6cb339-9eb1-4866-8a4f-383bad25b36f","name":{"pool_name":"oxp_458cbfa3-3752-415d-8a3b-fb64e88468e1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::5]:32345"},"services":[{"id":"7f6cb339-9eb1-4866-8a4f-383bad25b36f","details":{"type":"crucible","address":"[fd00:1122:3344:109::5]:32345"}}]},"root":"/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone"},{"zone":{"id":"11946372-f253-4648-b00c-c7874a7b2888","zone_type":"crucible","addresses":["fd00:1122:3344:109::8"],"dataset":{"id":"11946372-f253-4648-b00c-c7874a7b2888","name":{"pool_name":"oxp_d73332f5-b2a5-46c0-94cf-c5c5712abfe8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:109::8]:32345"},"services":[{"id":"11946372-f253-4648-b00c-c7874a7b2888","details":{"type":"crucible","address":"[fd00:1122:3344:109::8]:32345"}}]},"root":"/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone"},{"zone":{"id":"58ece9e1-387f-4d2f-a42f-69cd34f9f380","zone_type":"cockroach_db","addresses":["fd00:1122:3344:109::3"],"dataset":{"id":"58ece9e1-387f-4d2f-a42f-69cd34f9f380","name":{"pool_name":"oxp_7e67cb32-0c00-4090-9647-eb7bae75deeb","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:109::3]:32221"},"services":[{"id":"58ece9e1-387f-4d2f-a42f-69cd34f9f380","details":{"type":"cockroach_db","address":"[fd00:1122:3344:109::3]:32221"}}]},"root":"/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone"},{"zone":{"id":"f016a25a-deb5-4f20-bdb0-2425c00d41a6","zone_type":"ntp","addresses":["fd00:1122:3344:109::e"],"dataset":null,"services":[{"id":"f016a25a-deb5-4f20-bdb0-2425c00d41a6","details":{"type":"internal_ntp","address":"[fd00:1122:3344:109::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled2.json b/sled-agent/tests/old-service-ledgers/rack3-sled2.json new file mode 100644 index 0000000000..6c420c989d --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled2.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"dd799dd4-03f9-451d-85e2-844155753a03","zone_type":"crucible","addresses":["fd00:1122:3344:10a::7"],"dataset":{"id":"dd799dd4-03f9-451d-85e2-844155753a03","name":{"pool_name":"oxp_7dcf3acc-bde9-4306-bb46-4c6a6cbbb7ba","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::7]:32345"},"services":[{"id":"dd799dd4-03f9-451d-85e2-844155753a03","details":{"type":"crucible","address":"[fd00:1122:3344:10a::7]:32345"}}]},"root":"/pool/ext/7dcf3acc-bde9-4306-bb46-4c6a6cbbb7ba/crypt/zone"},{"zone":{"id":"dbf9346d-b46d-4402-bb44-92ce20fb5290","zone_type":"crucible","addresses":["fd00:1122:3344:10a::9"],"dataset":{"id":"dbf9346d-b46d-4402-bb44-92ce20fb5290","name":{"pool_name":"oxp_9275d50f-da2c-4f84-9775-598a364309ad","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::9]:32345"},"services":[{"id":"dbf9346d-b46d-4402-bb44-92ce20fb5290","details":{"type":"crucible","address":"[fd00:1122:3344:10a::9]:32345"}}]},"root":"/pool/ext/d83e36ef-dd7a-4cc2-be19-379b1114c031/crypt/zone"},{"zone":{"id":"9a55ebdd-eeef-4954-b0a1-e32b04837f14","zone_type":"crucible","addresses":["fd00:1122:3344:10a::4"],"dataset":{"id":"9a55ebdd-eeef-4954-b0a1-e32b04837f14","name":{"pool_name":"oxp_7f30f77e-5998-4676-a226-b433b5940e77","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::4]:32345"},"services":[{"id":"9a55ebdd-eeef-4954-b0a1-e32b04837f14","details":{"type":"crucible","address":"[fd00:1122:3344:10a::4]:32345"}}]},"root":"/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone"},{"zone":{"id":"bc2935f8-e4fa-4015-968e-f90985533a6a","zone_type":"crucible","addresses":["fd00:1122:3344:10a::6"],"dataset":{"id":"bc2935f8-e4fa-4015-968e-f90985533a6a","name":{"pool_name":"oxp_022c9d58-e91f-480d-bda6-0cf32ce3b1f5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::6]:32345"},"services":[{"id":"bc2935f8-e4fa-4015-968e-f90985533a6a","details":{"type":"crucible","address":"[fd00:1122:3344:10a::6]:32345"}}]},"root":"/pool/ext/c395dcc3-6ece-4b3f-b143-e111a54ef7da/crypt/zone"},{"zone":{"id":"63f8c861-fa1d-4121-92d9-7efa5ef7f5a0","zone_type":"crucible","addresses":["fd00:1122:3344:10a::a"],"dataset":{"id":"63f8c861-fa1d-4121-92d9-7efa5ef7f5a0","name":{"pool_name":"oxp_3c805784-f403-4d01-9eb0-4f77d0821980","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::a]:32345"},"services":[{"id":"63f8c861-fa1d-4121-92d9-7efa5ef7f5a0","details":{"type":"crucible","address":"[fd00:1122:3344:10a::a]:32345"}}]},"root":"/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone"},{"zone":{"id":"4996dcf9-78de-4f69-94fa-c09cc86a8d3c","zone_type":"crucible","addresses":["fd00:1122:3344:10a::b"],"dataset":{"id":"4996dcf9-78de-4f69-94fa-c09cc86a8d3c","name":{"pool_name":"oxp_f9fe9ce6-be0d-4974-bc30-78a8f1330496","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::b]:32345"},"services":[{"id":"4996dcf9-78de-4f69-94fa-c09cc86a8d3c","details":{"type":"crucible","address":"[fd00:1122:3344:10a::b]:32345"}}]},"root":"/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone"},{"zone":{"id":"36b9a4bf-7b30-4fe7-903d-3b722c79fa86","zone_type":"crucible","addresses":["fd00:1122:3344:10a::c"],"dataset":{"id":"36b9a4bf-7b30-4fe7-903d-3b722c79fa86","name":{"pool_name":"oxp_cb1052e0-4c70-4d37-b979-dd55e6a25f08","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::c]:32345"},"services":[{"id":"36b9a4bf-7b30-4fe7-903d-3b722c79fa86","details":{"type":"crucible","address":"[fd00:1122:3344:10a::c]:32345"}}]},"root":"/pool/ext/3c805784-f403-4d01-9eb0-4f77d0821980/crypt/zone"},{"zone":{"id":"a109a902-6a27-41b6-a881-c353e28e5389","zone_type":"crucible","addresses":["fd00:1122:3344:10a::8"],"dataset":{"id":"a109a902-6a27-41b6-a881-c353e28e5389","name":{"pool_name":"oxp_d83e36ef-dd7a-4cc2-be19-379b1114c031","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::8]:32345"},"services":[{"id":"a109a902-6a27-41b6-a881-c353e28e5389","details":{"type":"crucible","address":"[fd00:1122:3344:10a::8]:32345"}}]},"root":"/pool/ext/d83e36ef-dd7a-4cc2-be19-379b1114c031/crypt/zone"},{"zone":{"id":"d2a9a0bc-ea12-44e3-ac4a-904c76120d11","zone_type":"crucible","addresses":["fd00:1122:3344:10a::3"],"dataset":{"id":"d2a9a0bc-ea12-44e3-ac4a-904c76120d11","name":{"pool_name":"oxp_c395dcc3-6ece-4b3f-b143-e111a54ef7da","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::3]:32345"},"services":[{"id":"d2a9a0bc-ea12-44e3-ac4a-904c76120d11","details":{"type":"crucible","address":"[fd00:1122:3344:10a::3]:32345"}}]},"root":"/pool/ext/9898a289-2f0d-43a6-b053-850f6e784e9a/crypt/zone"},{"zone":{"id":"b3c3e53b-d9ec-4dd8-bd2c-bd811319aa44","zone_type":"crucible","addresses":["fd00:1122:3344:10a::5"],"dataset":{"id":"b3c3e53b-d9ec-4dd8-bd2c-bd811319aa44","name":{"pool_name":"oxp_9898a289-2f0d-43a6-b053-850f6e784e9a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10a::5]:32345"},"services":[{"id":"b3c3e53b-d9ec-4dd8-bd2c-bd811319aa44","details":{"type":"crucible","address":"[fd00:1122:3344:10a::5]:32345"}}]},"root":"/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone"},{"zone":{"id":"7b445d3b-fd25-4538-ac3f-f439c66d1223","zone_type":"ntp","addresses":["fd00:1122:3344:10a::d"],"dataset":null,"services":[{"id":"7b445d3b-fd25-4538-ac3f-f439c66d1223","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10a::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/f9fe9ce6-be0d-4974-bc30-78a8f1330496/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled20.json b/sled-agent/tests/old-service-ledgers/rack3-sled20.json new file mode 100644 index 0000000000..20c9d60624 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled20.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"4b49e669-264d-4bfb-8ab1-555b520b679c","zone_type":"crucible","addresses":["fd00:1122:3344:108::c"],"dataset":{"id":"4b49e669-264d-4bfb-8ab1-555b520b679c","name":{"pool_name":"oxp_799a1c86-9e1a-4626-91e2-a19f7ff5356e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::c]:32345"},"services":[{"id":"4b49e669-264d-4bfb-8ab1-555b520b679c","details":{"type":"crucible","address":"[fd00:1122:3344:108::c]:32345"}}]},"root":"/pool/ext/d2478613-b7c9-4bd3-856f-1fe8e9c903c2/crypt/zone"},{"zone":{"id":"d802baae-9c3f-437a-85fe-cd72653b6db1","zone_type":"crucible","addresses":["fd00:1122:3344:108::5"],"dataset":{"id":"d802baae-9c3f-437a-85fe-cd72653b6db1","name":{"pool_name":"oxp_d2478613-b7c9-4bd3-856f-1fe8e9c903c2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::5]:32345"},"services":[{"id":"d802baae-9c3f-437a-85fe-cd72653b6db1","details":{"type":"crucible","address":"[fd00:1122:3344:108::5]:32345"}}]},"root":"/pool/ext/116f216c-e151-410f-82bf-8913904cf7b4/crypt/zone"},{"zone":{"id":"e5f69e60-3421-49a4-8c1d-2db8cbb6a5e9","zone_type":"crucible","addresses":["fd00:1122:3344:108::b"],"dataset":{"id":"e5f69e60-3421-49a4-8c1d-2db8cbb6a5e9","name":{"pool_name":"oxp_116f216c-e151-410f-82bf-8913904cf7b4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::b]:32345"},"services":[{"id":"e5f69e60-3421-49a4-8c1d-2db8cbb6a5e9","details":{"type":"crucible","address":"[fd00:1122:3344:108::b]:32345"}}]},"root":"/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone"},{"zone":{"id":"3e598962-ef8c-4cb6-bdfe-ec8563939d6a","zone_type":"crucible","addresses":["fd00:1122:3344:108::4"],"dataset":{"id":"3e598962-ef8c-4cb6-bdfe-ec8563939d6a","name":{"pool_name":"oxp_ababce44-01d1-4c50-b389-f60464c5dde9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::4]:32345"},"services":[{"id":"3e598962-ef8c-4cb6-bdfe-ec8563939d6a","details":{"type":"crucible","address":"[fd00:1122:3344:108::4]:32345"}}]},"root":"/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone"},{"zone":{"id":"25355c9f-cc2b-4b24-8eaa-65190f8936a8","zone_type":"crucible","addresses":["fd00:1122:3344:108::d"],"dataset":{"id":"25355c9f-cc2b-4b24-8eaa-65190f8936a8","name":{"pool_name":"oxp_fed46d41-136d-4462-8782-359014efba59","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::d]:32345"},"services":[{"id":"25355c9f-cc2b-4b24-8eaa-65190f8936a8","details":{"type":"crucible","address":"[fd00:1122:3344:108::d]:32345"}}]},"root":"/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone"},{"zone":{"id":"efb2f16c-ebad-4192-b575-dcb4d9b1d5cd","zone_type":"crucible","addresses":["fd00:1122:3344:108::a"],"dataset":{"id":"efb2f16c-ebad-4192-b575-dcb4d9b1d5cd","name":{"pool_name":"oxp_bf509067-0165-456d-98ae-72c86378e626","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::a]:32345"},"services":[{"id":"efb2f16c-ebad-4192-b575-dcb4d9b1d5cd","details":{"type":"crucible","address":"[fd00:1122:3344:108::a]:32345"}}]},"root":"/pool/ext/95220093-e3b8-4f7f-9f5a-cb32cb75180a/crypt/zone"},{"zone":{"id":"89191f0d-4e0b-47fa-9a9e-fbe2a6db1385","zone_type":"crucible","addresses":["fd00:1122:3344:108::8"],"dataset":{"id":"89191f0d-4e0b-47fa-9a9e-fbe2a6db1385","name":{"pool_name":"oxp_eea15142-4635-4e40-b0b4-b0c4f13eca3c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::8]:32345"},"services":[{"id":"89191f0d-4e0b-47fa-9a9e-fbe2a6db1385","details":{"type":"crucible","address":"[fd00:1122:3344:108::8]:32345"}}]},"root":"/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone"},{"zone":{"id":"e4589324-c528-49c7-9141-35e0a7af6947","zone_type":"crucible","addresses":["fd00:1122:3344:108::6"],"dataset":{"id":"e4589324-c528-49c7-9141-35e0a7af6947","name":{"pool_name":"oxp_95220093-e3b8-4f7f-9f5a-cb32cb75180a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::6]:32345"},"services":[{"id":"e4589324-c528-49c7-9141-35e0a7af6947","details":{"type":"crucible","address":"[fd00:1122:3344:108::6]:32345"}}]},"root":"/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone"},{"zone":{"id":"95ebe94d-0e68-421d-9260-c30bd7fe4bd6","zone_type":"nexus","addresses":["fd00:1122:3344:108::3"],"dataset":null,"services":[{"id":"95ebe94d-0e68-421d-9260-c30bd7fe4bd6","details":{"type":"nexus","internal_address":"[fd00:1122:3344:108::3]:12221","external_ip":"45.154.216.35","nic":{"id":"301aa595-f072-4da3-a533-99647b44a66a","kind":{"type":"service","id":"95ebe94d-0e68-421d-9260-c30bd7fe4bd6"},"name":"nexus-95ebe94d-0e68-421d-9260-c30bd7fe4bd6","ip":"172.30.2.5","mac":"A8:40:25:FF:F1:30","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","8.8.8.8"]}}]},"root":"/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone"},{"zone":{"id":"4b7a7052-f8e8-4196-8d6b-315943986ce6","zone_type":"crucible","addresses":["fd00:1122:3344:108::7"],"dataset":{"id":"4b7a7052-f8e8-4196-8d6b-315943986ce6","name":{"pool_name":"oxp_a549421c-2f12-45cc-b691-202f0a9bfa8b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::7]:32345"},"services":[{"id":"4b7a7052-f8e8-4196-8d6b-315943986ce6","details":{"type":"crucible","address":"[fd00:1122:3344:108::7]:32345"}}]},"root":"/pool/ext/bf509067-0165-456d-98ae-72c86378e626/crypt/zone"},{"zone":{"id":"71b8ff53-c781-47bb-8ddc-2c7129680542","zone_type":"crucible","addresses":["fd00:1122:3344:108::9"],"dataset":{"id":"71b8ff53-c781-47bb-8ddc-2c7129680542","name":{"pool_name":"oxp_9d19f891-a3d9-4c6e-b1e1-6b0b085a9440","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:108::9]:32345"},"services":[{"id":"71b8ff53-c781-47bb-8ddc-2c7129680542","details":{"type":"crucible","address":"[fd00:1122:3344:108::9]:32345"}}]},"root":"/pool/ext/fed46d41-136d-4462-8782-359014efba59/crypt/zone"},{"zone":{"id":"eaf7bf77-f4c2-4016-9909-4b88a27e9d9a","zone_type":"ntp","addresses":["fd00:1122:3344:108::e"],"dataset":null,"services":[{"id":"eaf7bf77-f4c2-4016-9909-4b88a27e9d9a","details":{"type":"internal_ntp","address":"[fd00:1122:3344:108::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled21.json b/sled-agent/tests/old-service-ledgers/rack3-sled21.json new file mode 100644 index 0000000000..4f69e01c7f --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled21.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"a91e4af3-5d18-4b08-8cb6-0583db8f8842","zone_type":"crucible","addresses":["fd00:1122:3344:117::a"],"dataset":{"id":"a91e4af3-5d18-4b08-8cb6-0583db8f8842","name":{"pool_name":"oxp_4b2896b8-5f0e-42fb-a474-658b28421e65","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::a]:32345"},"services":[{"id":"a91e4af3-5d18-4b08-8cb6-0583db8f8842","details":{"type":"crucible","address":"[fd00:1122:3344:117::a]:32345"}}]},"root":"/pool/ext/23393ed9-acee-4686-861f-7fc825af1249/crypt/zone"},{"zone":{"id":"1ce74512-ce3a-4125-95f1-12c86e0275d5","zone_type":"crucible","addresses":["fd00:1122:3344:117::8"],"dataset":{"id":"1ce74512-ce3a-4125-95f1-12c86e0275d5","name":{"pool_name":"oxp_46ece76f-ef00-4dd0-9f73-326c63959470","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::8]:32345"},"services":[{"id":"1ce74512-ce3a-4125-95f1-12c86e0275d5","details":{"type":"crucible","address":"[fd00:1122:3344:117::8]:32345"}}]},"root":"/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone"},{"zone":{"id":"fef5d35f-9622-4dee-8635-d26e9f7f6869","zone_type":"crucible","addresses":["fd00:1122:3344:117::4"],"dataset":{"id":"fef5d35f-9622-4dee-8635-d26e9f7f6869","name":{"pool_name":"oxp_e4d7c2e8-016b-4617-afb5-38a2d9c1b508","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::4]:32345"},"services":[{"id":"fef5d35f-9622-4dee-8635-d26e9f7f6869","details":{"type":"crucible","address":"[fd00:1122:3344:117::4]:32345"}}]},"root":"/pool/ext/e372bba3-ef60-466f-b819-a3d5b9acbe77/crypt/zone"},{"zone":{"id":"4f024a31-cd38-4219-8381-9f1af70d1d54","zone_type":"crucible","addresses":["fd00:1122:3344:117::c"],"dataset":{"id":"4f024a31-cd38-4219-8381-9f1af70d1d54","name":{"pool_name":"oxp_7cb2a3c2-9d33-4c6a-af57-669f251cf4cf","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::c]:32345"},"services":[{"id":"4f024a31-cd38-4219-8381-9f1af70d1d54","details":{"type":"crucible","address":"[fd00:1122:3344:117::c]:32345"}}]},"root":"/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone"},{"zone":{"id":"d00e1d0b-e12f-420a-a4df-21e4cac176f6","zone_type":"crucible","addresses":["fd00:1122:3344:117::b"],"dataset":{"id":"d00e1d0b-e12f-420a-a4df-21e4cac176f6","name":{"pool_name":"oxp_e372bba3-ef60-466f-b819-a3d5b9acbe77","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::b]:32345"},"services":[{"id":"d00e1d0b-e12f-420a-a4df-21e4cac176f6","details":{"type":"crucible","address":"[fd00:1122:3344:117::b]:32345"}}]},"root":"/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone"},{"zone":{"id":"1598058a-6064-449e-b39c-1e3d345ed793","zone_type":"crucible","addresses":["fd00:1122:3344:117::5"],"dataset":{"id":"1598058a-6064-449e-b39c-1e3d345ed793","name":{"pool_name":"oxp_022a8d67-1e00-49f3-81ed-a0a1bc187cfa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::5]:32345"},"services":[{"id":"1598058a-6064-449e-b39c-1e3d345ed793","details":{"type":"crucible","address":"[fd00:1122:3344:117::5]:32345"}}]},"root":"/pool/ext/022a8d67-1e00-49f3-81ed-a0a1bc187cfa/crypt/zone"},{"zone":{"id":"c723c4b8-3031-4b25-8c16-fe08bc0b5f00","zone_type":"crucible","addresses":["fd00:1122:3344:117::7"],"dataset":{"id":"c723c4b8-3031-4b25-8c16-fe08bc0b5f00","name":{"pool_name":"oxp_23393ed9-acee-4686-861f-7fc825af1249","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::7]:32345"},"services":[{"id":"c723c4b8-3031-4b25-8c16-fe08bc0b5f00","details":{"type":"crucible","address":"[fd00:1122:3344:117::7]:32345"}}]},"root":"/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone"},{"zone":{"id":"7751b307-888f-46c8-8787-75d2f3fdaef3","zone_type":"crucible","addresses":["fd00:1122:3344:117::9"],"dataset":{"id":"7751b307-888f-46c8-8787-75d2f3fdaef3","name":{"pool_name":"oxp_e54e53d4-f68f-4b19-b8c1-9d5ab42e51c1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::9]:32345"},"services":[{"id":"7751b307-888f-46c8-8787-75d2f3fdaef3","details":{"type":"crucible","address":"[fd00:1122:3344:117::9]:32345"}}]},"root":"/pool/ext/e372bba3-ef60-466f-b819-a3d5b9acbe77/crypt/zone"},{"zone":{"id":"89413ff1-d5de-4931-8389-e84e7ea321af","zone_type":"crucible","addresses":["fd00:1122:3344:117::6"],"dataset":{"id":"89413ff1-d5de-4931-8389-e84e7ea321af","name":{"pool_name":"oxp_1bd5955e-14a9-463f-adeb-f12bcb45a6c1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::6]:32345"},"services":[{"id":"89413ff1-d5de-4931-8389-e84e7ea321af","details":{"type":"crucible","address":"[fd00:1122:3344:117::6]:32345"}}]},"root":"/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone"},{"zone":{"id":"287b0b24-72aa-41b5-a597-8523d84225ef","zone_type":"crucible","addresses":["fd00:1122:3344:117::3"],"dataset":{"id":"287b0b24-72aa-41b5-a597-8523d84225ef","name":{"pool_name":"oxp_cfbd185d-e185-4aaa-a598-9216124ceec4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:117::3]:32345"},"services":[{"id":"287b0b24-72aa-41b5-a597-8523d84225ef","details":{"type":"crucible","address":"[fd00:1122:3344:117::3]:32345"}}]},"root":"/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone"},{"zone":{"id":"4728253e-c534-4a5b-b707-c64ac9a8eb8c","zone_type":"ntp","addresses":["fd00:1122:3344:117::d"],"dataset":null,"services":[{"id":"4728253e-c534-4a5b-b707-c64ac9a8eb8c","details":{"type":"internal_ntp","address":"[fd00:1122:3344:117::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled22.json b/sled-agent/tests/old-service-ledgers/rack3-sled22.json new file mode 100644 index 0000000000..dc98c0390c --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled22.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"49f20cd1-a8a3-4fa8-9209-59da60cd8f9b","zone_type":"crucible","addresses":["fd00:1122:3344:103::5"],"dataset":{"id":"49f20cd1-a8a3-4fa8-9209-59da60cd8f9b","name":{"pool_name":"oxp_13a9ef4a-f33a-4781-8f83-712c07a79b1f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::5]:32345"},"services":[{"id":"49f20cd1-a8a3-4fa8-9209-59da60cd8f9b","details":{"type":"crucible","address":"[fd00:1122:3344:103::5]:32345"}}]},"root":"/pool/ext/711eff4e-736c-478e-83aa-ae86f5efbf1d/crypt/zone"},{"zone":{"id":"896fd564-f94e-496b-9fcf-ddfbfcfac9f7","zone_type":"crucible","addresses":["fd00:1122:3344:103::c"],"dataset":{"id":"896fd564-f94e-496b-9fcf-ddfbfcfac9f7","name":{"pool_name":"oxp_0944c0a2-0fb7-4f51-bced-52cc257cd2f6","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::c]:32345"},"services":[{"id":"896fd564-f94e-496b-9fcf-ddfbfcfac9f7","details":{"type":"crucible","address":"[fd00:1122:3344:103::c]:32345"}}]},"root":"/pool/ext/bc54d8c5-955d-429d-84e0-a20a4e5e27a3/crypt/zone"},{"zone":{"id":"911fb8b3-05c2-4af7-8974-6c74a61d94ad","zone_type":"crucible","addresses":["fd00:1122:3344:103::9"],"dataset":{"id":"911fb8b3-05c2-4af7-8974-6c74a61d94ad","name":{"pool_name":"oxp_29f59fce-a867-4571-9d2e-b03fa5c13510","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::9]:32345"},"services":[{"id":"911fb8b3-05c2-4af7-8974-6c74a61d94ad","details":{"type":"crucible","address":"[fd00:1122:3344:103::9]:32345"}}]},"root":"/pool/ext/711eff4e-736c-478e-83aa-ae86f5efbf1d/crypt/zone"},{"zone":{"id":"682b34db-0b06-4770-a8fe-74437cf184d6","zone_type":"crucible","addresses":["fd00:1122:3344:103::6"],"dataset":{"id":"682b34db-0b06-4770-a8fe-74437cf184d6","name":{"pool_name":"oxp_094d11d2-8049-4138-bcf4-562f5f8e77c0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::6]:32345"},"services":[{"id":"682b34db-0b06-4770-a8fe-74437cf184d6","details":{"type":"crucible","address":"[fd00:1122:3344:103::6]:32345"}}]},"root":"/pool/ext/0944c0a2-0fb7-4f51-bced-52cc257cd2f6/crypt/zone"},{"zone":{"id":"d8d20365-ecd3-4fd5-9495-c0670e3bd5d9","zone_type":"crucible","addresses":["fd00:1122:3344:103::a"],"dataset":{"id":"d8d20365-ecd3-4fd5-9495-c0670e3bd5d9","name":{"pool_name":"oxp_fb97ff7b-0225-400c-a137-3b38a786c0a0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::a]:32345"},"services":[{"id":"d8d20365-ecd3-4fd5-9495-c0670e3bd5d9","details":{"type":"crucible","address":"[fd00:1122:3344:103::a]:32345"}}]},"root":"/pool/ext/094d11d2-8049-4138-bcf4-562f5f8e77c0/crypt/zone"},{"zone":{"id":"673620b6-44d9-4310-8e17-3024ac84e708","zone_type":"crucible","addresses":["fd00:1122:3344:103::7"],"dataset":{"id":"673620b6-44d9-4310-8e17-3024ac84e708","name":{"pool_name":"oxp_711eff4e-736c-478e-83aa-ae86f5efbf1d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::7]:32345"},"services":[{"id":"673620b6-44d9-4310-8e17-3024ac84e708","details":{"type":"crucible","address":"[fd00:1122:3344:103::7]:32345"}}]},"root":"/pool/ext/fb97ff7b-0225-400c-a137-3b38a786c0a0/crypt/zone"},{"zone":{"id":"bf6dfc04-4d4c-41b6-a011-40ffc3bc5080","zone_type":"crucible","addresses":["fd00:1122:3344:103::8"],"dataset":{"id":"bf6dfc04-4d4c-41b6-a011-40ffc3bc5080","name":{"pool_name":"oxp_f815f1b6-48ef-436d-8768-eb08227e2386","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::8]:32345"},"services":[{"id":"bf6dfc04-4d4c-41b6-a011-40ffc3bc5080","details":{"type":"crucible","address":"[fd00:1122:3344:103::8]:32345"}}]},"root":"/pool/ext/13a9ef4a-f33a-4781-8f83-712c07a79b1f/crypt/zone"},{"zone":{"id":"ac8a82a8-fb6f-4635-a9a9-d98617eab390","zone_type":"crucible","addresses":["fd00:1122:3344:103::3"],"dataset":{"id":"ac8a82a8-fb6f-4635-a9a9-d98617eab390","name":{"pool_name":"oxp_97d6c860-4e2f-496e-974b-2e293fee6af9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::3]:32345"},"services":[{"id":"ac8a82a8-fb6f-4635-a9a9-d98617eab390","details":{"type":"crucible","address":"[fd00:1122:3344:103::3]:32345"}}]},"root":"/pool/ext/0944c0a2-0fb7-4f51-bced-52cc257cd2f6/crypt/zone"},{"zone":{"id":"4ed66558-4815-4b85-9b94-9edf3ee69ead","zone_type":"crucible","addresses":["fd00:1122:3344:103::4"],"dataset":{"id":"4ed66558-4815-4b85-9b94-9edf3ee69ead","name":{"pool_name":"oxp_bc54d8c5-955d-429d-84e0-a20a4e5e27a3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::4]:32345"},"services":[{"id":"4ed66558-4815-4b85-9b94-9edf3ee69ead","details":{"type":"crucible","address":"[fd00:1122:3344:103::4]:32345"}}]},"root":"/pool/ext/13a9ef4a-f33a-4781-8f83-712c07a79b1f/crypt/zone"},{"zone":{"id":"8a71c6ee-b08d-4c3d-b13c-c9cebc4c328a","zone_type":"crucible","addresses":["fd00:1122:3344:103::b"],"dataset":{"id":"8a71c6ee-b08d-4c3d-b13c-c9cebc4c328a","name":{"pool_name":"oxp_2bdfa429-09bd-4fa1-aa20-eea99f0d2b85","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:103::b]:32345"},"services":[{"id":"8a71c6ee-b08d-4c3d-b13c-c9cebc4c328a","details":{"type":"crucible","address":"[fd00:1122:3344:103::b]:32345"}}]},"root":"/pool/ext/29f59fce-a867-4571-9d2e-b03fa5c13510/crypt/zone"},{"zone":{"id":"7e6b8962-7a1e-4d7b-b7ea-49e64a51d98d","zone_type":"ntp","addresses":["fd00:1122:3344:103::d"],"dataset":null,"services":[{"id":"7e6b8962-7a1e-4d7b-b7ea-49e64a51d98d","details":{"type":"internal_ntp","address":"[fd00:1122:3344:103::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/2bdfa429-09bd-4fa1-aa20-eea99f0d2b85/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled23.json b/sled-agent/tests/old-service-ledgers/rack3-sled23.json new file mode 100644 index 0000000000..ade2144287 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled23.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"6b7e931d-4b91-4dc6-9a7b-4c19ac669e5d","zone_type":"crucible","addresses":["fd00:1122:3344:105::4"],"dataset":{"id":"6b7e931d-4b91-4dc6-9a7b-4c19ac669e5d","name":{"pool_name":"oxp_24dab7f5-164a-47f3-a878-f32ab1e68cce","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::4]:32345"},"services":[{"id":"6b7e931d-4b91-4dc6-9a7b-4c19ac669e5d","details":{"type":"crucible","address":"[fd00:1122:3344:105::4]:32345"}}]},"root":"/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone"},{"zone":{"id":"6c58e7aa-71e1-4868-9d4b-e12c7ef40303","zone_type":"crucible","addresses":["fd00:1122:3344:105::a"],"dataset":{"id":"6c58e7aa-71e1-4868-9d4b-e12c7ef40303","name":{"pool_name":"oxp_d664c9e8-bc81-4225-a618-a8ae2d057186","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::a]:32345"},"services":[{"id":"6c58e7aa-71e1-4868-9d4b-e12c7ef40303","details":{"type":"crucible","address":"[fd00:1122:3344:105::a]:32345"}}]},"root":"/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone"},{"zone":{"id":"51c6dc8d-b1a4-454a-9b19-01e45eb0b599","zone_type":"crucible","addresses":["fd00:1122:3344:105::d"],"dataset":{"id":"51c6dc8d-b1a4-454a-9b19-01e45eb0b599","name":{"pool_name":"oxp_f5f85537-eb25-4d0e-8e94-b775c41abd73","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::d]:32345"},"services":[{"id":"51c6dc8d-b1a4-454a-9b19-01e45eb0b599","details":{"type":"crucible","address":"[fd00:1122:3344:105::d]:32345"}}]},"root":"/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone"},{"zone":{"id":"8cbffa61-0bd0-4ad2-bd7d-30fe0dd57469","zone_type":"crucible","addresses":["fd00:1122:3344:105::9"],"dataset":{"id":"8cbffa61-0bd0-4ad2-bd7d-30fe0dd57469","name":{"pool_name":"oxp_88abca38-3f61-4d4b-80a1-4ea3e4827f84","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::9]:32345"},"services":[{"id":"8cbffa61-0bd0-4ad2-bd7d-30fe0dd57469","details":{"type":"crucible","address":"[fd00:1122:3344:105::9]:32345"}}]},"root":"/pool/ext/88abca38-3f61-4d4b-80a1-4ea3e4827f84/crypt/zone"},{"zone":{"id":"2177f37f-2ac9-4e66-bf74-a10bd91f4d33","zone_type":"crucible","addresses":["fd00:1122:3344:105::6"],"dataset":{"id":"2177f37f-2ac9-4e66-bf74-a10bd91f4d33","name":{"pool_name":"oxp_59e20871-4670-40d6-8ff4-aa97899fc991","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::6]:32345"},"services":[{"id":"2177f37f-2ac9-4e66-bf74-a10bd91f4d33","details":{"type":"crucible","address":"[fd00:1122:3344:105::6]:32345"}}]},"root":"/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone"},{"zone":{"id":"e4e43855-4879-4910-a2ba-40f625c1cc2d","zone_type":"crucible","addresses":["fd00:1122:3344:105::b"],"dataset":{"id":"e4e43855-4879-4910-a2ba-40f625c1cc2d","name":{"pool_name":"oxp_967d2f05-b141-44f5-837d-9b2aa67ee128","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::b]:32345"},"services":[{"id":"e4e43855-4879-4910-a2ba-40f625c1cc2d","details":{"type":"crucible","address":"[fd00:1122:3344:105::b]:32345"}}]},"root":"/pool/ext/6b6f34cd-6d3d-4832-a4e6-3df112c97133/crypt/zone"},{"zone":{"id":"8d2517e1-f9ad-40f2-abb9-2f5122839910","zone_type":"crucible","addresses":["fd00:1122:3344:105::7"],"dataset":{"id":"8d2517e1-f9ad-40f2-abb9-2f5122839910","name":{"pool_name":"oxp_ad493851-2d11-4c2d-8d75-989579d9616a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::7]:32345"},"services":[{"id":"8d2517e1-f9ad-40f2-abb9-2f5122839910","details":{"type":"crucible","address":"[fd00:1122:3344:105::7]:32345"}}]},"root":"/pool/ext/88abca38-3f61-4d4b-80a1-4ea3e4827f84/crypt/zone"},{"zone":{"id":"44cb3698-a7b1-4388-9165-ac76082ec8bc","zone_type":"crucible","addresses":["fd00:1122:3344:105::5"],"dataset":{"id":"44cb3698-a7b1-4388-9165-ac76082ec8bc","name":{"pool_name":"oxp_4292a83c-8c1f-4b2e-9120-72e0c510bf3c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::5]:32345"},"services":[{"id":"44cb3698-a7b1-4388-9165-ac76082ec8bc","details":{"type":"crucible","address":"[fd00:1122:3344:105::5]:32345"}}]},"root":"/pool/ext/24dab7f5-164a-47f3-a878-f32ab1e68cce/crypt/zone"},{"zone":{"id":"931b5c86-9d72-4518-bfd6-97863152ac65","zone_type":"crucible","addresses":["fd00:1122:3344:105::c"],"dataset":{"id":"931b5c86-9d72-4518-bfd6-97863152ac65","name":{"pool_name":"oxp_6b6f34cd-6d3d-4832-a4e6-3df112c97133","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::c]:32345"},"services":[{"id":"931b5c86-9d72-4518-bfd6-97863152ac65","details":{"type":"crucible","address":"[fd00:1122:3344:105::c]:32345"}}]},"root":"/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone"},{"zone":{"id":"ac568073-1889-463e-8cc4-cfed16ce2a34","zone_type":"crucible","addresses":["fd00:1122:3344:105::8"],"dataset":{"id":"ac568073-1889-463e-8cc4-cfed16ce2a34","name":{"pool_name":"oxp_4f1eafe9-b28d-49d3-83e2-ceac8721d6b5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:105::8]:32345"},"services":[{"id":"ac568073-1889-463e-8cc4-cfed16ce2a34","details":{"type":"crucible","address":"[fd00:1122:3344:105::8]:32345"}}]},"root":"/pool/ext/4292a83c-8c1f-4b2e-9120-72e0c510bf3c/crypt/zone"},{"zone":{"id":"e8f86fbb-864e-4d5a-961c-b50b54ae853e","zone_type":"cockroach_db","addresses":["fd00:1122:3344:105::3"],"dataset":{"id":"e8f86fbb-864e-4d5a-961c-b50b54ae853e","name":{"pool_name":"oxp_24dab7f5-164a-47f3-a878-f32ab1e68cce","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:105::3]:32221"},"services":[{"id":"e8f86fbb-864e-4d5a-961c-b50b54ae853e","details":{"type":"cockroach_db","address":"[fd00:1122:3344:105::3]:32221"}}]},"root":"/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone"},{"zone":{"id":"c79caea0-37b1-49d6-ae6e-8cf849d91374","zone_type":"ntp","addresses":["fd00:1122:3344:105::e"],"dataset":null,"services":[{"id":"c79caea0-37b1-49d6-ae6e-8cf849d91374","details":{"type":"internal_ntp","address":"[fd00:1122:3344:105::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/24dab7f5-164a-47f3-a878-f32ab1e68cce/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled24.json b/sled-agent/tests/old-service-ledgers/rack3-sled24.json new file mode 100644 index 0000000000..e7bd3050d6 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled24.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"d2b1e468-bc3c-4d08-b855-ae3327465375","zone_type":"crucible","addresses":["fd00:1122:3344:106::3"],"dataset":{"id":"d2b1e468-bc3c-4d08-b855-ae3327465375","name":{"pool_name":"oxp_9db196bf-828d-4e55-a2c1-dd9d579d3908","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::3]:32345"},"services":[{"id":"d2b1e468-bc3c-4d08-b855-ae3327465375","details":{"type":"crucible","address":"[fd00:1122:3344:106::3]:32345"}}]},"root":"/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone"},{"zone":{"id":"61f94a16-79fd-42e3-b225-a4dc67228437","zone_type":"crucible","addresses":["fd00:1122:3344:106::6"],"dataset":{"id":"61f94a16-79fd-42e3-b225-a4dc67228437","name":{"pool_name":"oxp_d77d5b08-5f70-496a-997b-b38804dc3b8a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::6]:32345"},"services":[{"id":"61f94a16-79fd-42e3-b225-a4dc67228437","details":{"type":"crucible","address":"[fd00:1122:3344:106::6]:32345"}}]},"root":"/pool/ext/daf9e3cd-5a40-4eba-a0f6-4f94dab37dae/crypt/zone"},{"zone":{"id":"7d32ef34-dec5-4fd8-899e-20bbc473a3ee","zone_type":"crucible","addresses":["fd00:1122:3344:106::7"],"dataset":{"id":"7d32ef34-dec5-4fd8-899e-20bbc473a3ee","name":{"pool_name":"oxp_50c1b653-6231-41fe-b3cf-b7ba709a0746","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::7]:32345"},"services":[{"id":"7d32ef34-dec5-4fd8-899e-20bbc473a3ee","details":{"type":"crucible","address":"[fd00:1122:3344:106::7]:32345"}}]},"root":"/pool/ext/9db196bf-828d-4e55-a2c1-dd9d579d3908/crypt/zone"},{"zone":{"id":"c34b7ae5-26b9-4651-a3c4-20bba2bd0d2c","zone_type":"crucible","addresses":["fd00:1122:3344:106::5"],"dataset":{"id":"c34b7ae5-26b9-4651-a3c4-20bba2bd0d2c","name":{"pool_name":"oxp_88aea92c-ab92-44c1-9471-eb8e30e075d3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::5]:32345"},"services":[{"id":"c34b7ae5-26b9-4651-a3c4-20bba2bd0d2c","details":{"type":"crucible","address":"[fd00:1122:3344:106::5]:32345"}}]},"root":"/pool/ext/8da316d4-6b18-4980-a0a8-6e76e72cc40d/crypt/zone"},{"zone":{"id":"36472be8-9a70-4c14-bd02-439b725cec1a","zone_type":"crucible","addresses":["fd00:1122:3344:106::8"],"dataset":{"id":"36472be8-9a70-4c14-bd02-439b725cec1a","name":{"pool_name":"oxp_54544b3a-1513-4db2-911e-7c1eb4b12385","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::8]:32345"},"services":[{"id":"36472be8-9a70-4c14-bd02-439b725cec1a","details":{"type":"crucible","address":"[fd00:1122:3344:106::8]:32345"}}]},"root":"/pool/ext/54544b3a-1513-4db2-911e-7c1eb4b12385/crypt/zone"},{"zone":{"id":"2548f8ab-5255-4334-a1fb-5d7d95213129","zone_type":"crucible","addresses":["fd00:1122:3344:106::9"],"dataset":{"id":"2548f8ab-5255-4334-a1fb-5d7d95213129","name":{"pool_name":"oxp_08050450-967f-431c-9a12-0d051aff020e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::9]:32345"},"services":[{"id":"2548f8ab-5255-4334-a1fb-5d7d95213129","details":{"type":"crucible","address":"[fd00:1122:3344:106::9]:32345"}}]},"root":"/pool/ext/08050450-967f-431c-9a12-0d051aff020e/crypt/zone"},{"zone":{"id":"1455c069-853c-49cd-853a-3ea81b89acd4","zone_type":"crucible","addresses":["fd00:1122:3344:106::c"],"dataset":{"id":"1455c069-853c-49cd-853a-3ea81b89acd4","name":{"pool_name":"oxp_8da316d4-6b18-4980-a0a8-6e76e72cc40d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::c]:32345"},"services":[{"id":"1455c069-853c-49cd-853a-3ea81b89acd4","details":{"type":"crucible","address":"[fd00:1122:3344:106::c]:32345"}}]},"root":"/pool/ext/08050450-967f-431c-9a12-0d051aff020e/crypt/zone"},{"zone":{"id":"27c0244b-f91a-46c3-bc96-e8eec009371e","zone_type":"crucible","addresses":["fd00:1122:3344:106::b"],"dataset":{"id":"27c0244b-f91a-46c3-bc96-e8eec009371e","name":{"pool_name":"oxp_daf9e3cd-5a40-4eba-a0f6-4f94dab37dae","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::b]:32345"},"services":[{"id":"27c0244b-f91a-46c3-bc96-e8eec009371e","details":{"type":"crucible","address":"[fd00:1122:3344:106::b]:32345"}}]},"root":"/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone"},{"zone":{"id":"9e46d837-1e0f-42b6-a352-84e6946b8734","zone_type":"crucible","addresses":["fd00:1122:3344:106::4"],"dataset":{"id":"9e46d837-1e0f-42b6-a352-84e6946b8734","name":{"pool_name":"oxp_74df4c92-edbb-4431-a770-1d015110e66b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::4]:32345"},"services":[{"id":"9e46d837-1e0f-42b6-a352-84e6946b8734","details":{"type":"crucible","address":"[fd00:1122:3344:106::4]:32345"}}]},"root":"/pool/ext/15f94c39-d48c-41f6-a913-cc1d04aef1a2/crypt/zone"},{"zone":{"id":"b972fcd4-c1b3-4b3c-9e24-f59c7a7cb192","zone_type":"crucible","addresses":["fd00:1122:3344:106::a"],"dataset":{"id":"b972fcd4-c1b3-4b3c-9e24-f59c7a7cb192","name":{"pool_name":"oxp_15f94c39-d48c-41f6-a913-cc1d04aef1a2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:106::a]:32345"},"services":[{"id":"b972fcd4-c1b3-4b3c-9e24-f59c7a7cb192","details":{"type":"crucible","address":"[fd00:1122:3344:106::a]:32345"}}]},"root":"/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone"},{"zone":{"id":"e1c8c655-1950-42d5-ae1f-a4ce84854bbc","zone_type":"ntp","addresses":["fd00:1122:3344:106::d"],"dataset":null,"services":[{"id":"e1c8c655-1950-42d5-ae1f-a4ce84854bbc","details":{"type":"internal_ntp","address":"[fd00:1122:3344:106::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/15f94c39-d48c-41f6-a913-cc1d04aef1a2/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled25.json b/sled-agent/tests/old-service-ledgers/rack3-sled25.json new file mode 100644 index 0000000000..642657bbce --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled25.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"10b80058-9b2e-4d6c-8a1a-a61a8258c12f","zone_type":"crucible","addresses":["fd00:1122:3344:118::9"],"dataset":{"id":"10b80058-9b2e-4d6c-8a1a-a61a8258c12f","name":{"pool_name":"oxp_953c19bb-9fff-4488-8a7b-29de9994a948","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::9]:32345"},"services":[{"id":"10b80058-9b2e-4d6c-8a1a-a61a8258c12f","details":{"type":"crucible","address":"[fd00:1122:3344:118::9]:32345"}}]},"root":"/pool/ext/a78caf97-6145-4908-83b5-a03a6d2e0ac4/crypt/zone"},{"zone":{"id":"f58fef96-7b5e-40c2-9482-669088a19209","zone_type":"crucible","addresses":["fd00:1122:3344:118::d"],"dataset":{"id":"f58fef96-7b5e-40c2-9482-669088a19209","name":{"pool_name":"oxp_d7976706-d6ed-4465-8b04-450c96d8feec","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::d]:32345"},"services":[{"id":"f58fef96-7b5e-40c2-9482-669088a19209","details":{"type":"crucible","address":"[fd00:1122:3344:118::d]:32345"}}]},"root":"/pool/ext/d7976706-d6ed-4465-8b04-450c96d8feec/crypt/zone"},{"zone":{"id":"624f1168-47b6-4aa1-84da-e20a0d74d783","zone_type":"crucible","addresses":["fd00:1122:3344:118::b"],"dataset":{"id":"624f1168-47b6-4aa1-84da-e20a0d74d783","name":{"pool_name":"oxp_a78caf97-6145-4908-83b5-a03a6d2e0ac4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::b]:32345"},"services":[{"id":"624f1168-47b6-4aa1-84da-e20a0d74d783","details":{"type":"crucible","address":"[fd00:1122:3344:118::b]:32345"}}]},"root":"/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone"},{"zone":{"id":"8ea85412-19b4-45c1-a53c-027ddd629296","zone_type":"crucible","addresses":["fd00:1122:3344:118::6"],"dataset":{"id":"8ea85412-19b4-45c1-a53c-027ddd629296","name":{"pool_name":"oxp_d5f4c903-155a-4c91-aadd-6039a4f64821","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::6]:32345"},"services":[{"id":"8ea85412-19b4-45c1-a53c-027ddd629296","details":{"type":"crucible","address":"[fd00:1122:3344:118::6]:32345"}}]},"root":"/pool/ext/7d2a7685-c1c9-4d2d-a2bb-df65d96ea3e2/crypt/zone"},{"zone":{"id":"fd226b82-71d7-4719-b32c-a6c7abe28a2a","zone_type":"external_dns","addresses":["fd00:1122:3344:118::3"],"dataset":{"id":"fd226b82-71d7-4719-b32c-a6c7abe28a2a","name":{"pool_name":"oxp_84a80b58-70e9-439c-9558-5b343d9a4b53","kind":{"type":"external_dns"}},"service_address":"[fd00:1122:3344:118::3]:5353"},"services":[{"id":"fd226b82-71d7-4719-b32c-a6c7abe28a2a","details":{"type":"external_dns","http_address":"[fd00:1122:3344:118::3]:5353","dns_address":"45.154.216.34:53","nic":{"id":"7f72b6fd-1120-44dc-b3a7-f727502ba47c","kind":{"type":"service","id":"fd226b82-71d7-4719-b32c-a6c7abe28a2a"},"name":"external-dns-fd226b82-71d7-4719-b32c-a6c7abe28a2a","ip":"172.30.1.6","mac":"A8:40:25:FF:9E:D1","subnet":"172.30.1.0/24","vni":100,"primary":true,"slot":0}}}]},"root":"/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone"},{"zone":{"id":"08d0c38d-f0d9-45b9-856d-b85059fe5f07","zone_type":"crucible","addresses":["fd00:1122:3344:118::4"],"dataset":{"id":"08d0c38d-f0d9-45b9-856d-b85059fe5f07","name":{"pool_name":"oxp_84a80b58-70e9-439c-9558-5b343d9a4b53","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::4]:32345"},"services":[{"id":"08d0c38d-f0d9-45b9-856d-b85059fe5f07","details":{"type":"crucible","address":"[fd00:1122:3344:118::4]:32345"}}]},"root":"/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone"},{"zone":{"id":"5de7d3fd-4a3f-4fdd-b6b2-d1186e16dce5","zone_type":"crucible","addresses":["fd00:1122:3344:118::7"],"dataset":{"id":"5de7d3fd-4a3f-4fdd-b6b2-d1186e16dce5","name":{"pool_name":"oxp_d76e058f-2d1e-4b15-b3a0-e5509a246876","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::7]:32345"},"services":[{"id":"5de7d3fd-4a3f-4fdd-b6b2-d1186e16dce5","details":{"type":"crucible","address":"[fd00:1122:3344:118::7]:32345"}}]},"root":"/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone"},{"zone":{"id":"5d0f5cad-10b3-497c-903b-eeeabce920e2","zone_type":"crucible","addresses":["fd00:1122:3344:118::8"],"dataset":{"id":"5d0f5cad-10b3-497c-903b-eeeabce920e2","name":{"pool_name":"oxp_3a3ad639-8800-4951-bc2a-201d269e47a2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::8]:32345"},"services":[{"id":"5d0f5cad-10b3-497c-903b-eeeabce920e2","details":{"type":"crucible","address":"[fd00:1122:3344:118::8]:32345"}}]},"root":"/pool/ext/3a3ad639-8800-4951-bc2a-201d269e47a2/crypt/zone"},{"zone":{"id":"39f9cefa-801c-4843-9fb9-05446ffbdd1a","zone_type":"crucible","addresses":["fd00:1122:3344:118::a"],"dataset":{"id":"39f9cefa-801c-4843-9fb9-05446ffbdd1a","name":{"pool_name":"oxp_7d2a7685-c1c9-4d2d-a2bb-df65d96ea3e2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::a]:32345"},"services":[{"id":"39f9cefa-801c-4843-9fb9-05446ffbdd1a","details":{"type":"crucible","address":"[fd00:1122:3344:118::a]:32345"}}]},"root":"/pool/ext/a78caf97-6145-4908-83b5-a03a6d2e0ac4/crypt/zone"},{"zone":{"id":"0711e710-7fdd-4e68-94c8-294b8677e804","zone_type":"crucible","addresses":["fd00:1122:3344:118::5"],"dataset":{"id":"0711e710-7fdd-4e68-94c8-294b8677e804","name":{"pool_name":"oxp_a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::5]:32345"},"services":[{"id":"0711e710-7fdd-4e68-94c8-294b8677e804","details":{"type":"crucible","address":"[fd00:1122:3344:118::5]:32345"}}]},"root":"/pool/ext/3a3ad639-8800-4951-bc2a-201d269e47a2/crypt/zone"},{"zone":{"id":"318a62cc-5c6c-4805-9fb6-c0f6a75ce31c","zone_type":"crucible","addresses":["fd00:1122:3344:118::c"],"dataset":{"id":"318a62cc-5c6c-4805-9fb6-c0f6a75ce31c","name":{"pool_name":"oxp_1d5f0ba3-6b31-4cea-a9a9-2065a538887d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:118::c]:32345"},"services":[{"id":"318a62cc-5c6c-4805-9fb6-c0f6a75ce31c","details":{"type":"crucible","address":"[fd00:1122:3344:118::c]:32345"}}]},"root":"/pool/ext/d7976706-d6ed-4465-8b04-450c96d8feec/crypt/zone"},{"zone":{"id":"463d0498-85b9-40eb-af96-d99af58a587c","zone_type":"ntp","addresses":["fd00:1122:3344:118::e"],"dataset":null,"services":[{"id":"463d0498-85b9-40eb-af96-d99af58a587c","details":{"type":"internal_ntp","address":"[fd00:1122:3344:118::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/d5f4c903-155a-4c91-aadd-6039a4f64821/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled26.json b/sled-agent/tests/old-service-ledgers/rack3-sled26.json new file mode 100644 index 0000000000..0978cb9e45 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled26.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"d8b3de97-cc79-48f6-83ad-02017c21223b","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:119::3"],"dataset":null,"services":[{"id":"d8b3de97-cc79-48f6-83ad-02017c21223b","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:119::3]:17000"}}]},"root":"/pool/ext/e0faea44-8b5c-40b0-bb75-a1aec1a10377/crypt/zone"},{"zone":{"id":"adba1a3b-5bac-44d5-aa5a-879dc6eadb5f","zone_type":"crucible","addresses":["fd00:1122:3344:119::c"],"dataset":{"id":"adba1a3b-5bac-44d5-aa5a-879dc6eadb5f","name":{"pool_name":"oxp_21c339c3-6461-4bdb-8b0e-c0f9f08ee10b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::c]:32345"},"services":[{"id":"adba1a3b-5bac-44d5-aa5a-879dc6eadb5f","details":{"type":"crucible","address":"[fd00:1122:3344:119::c]:32345"}}]},"root":"/pool/ext/f5c73c28-2168-4321-b737-4ca6663155c9/crypt/zone"},{"zone":{"id":"42bb9833-5c39-4aba-b2c4-da2ca1287728","zone_type":"crucible","addresses":["fd00:1122:3344:119::a"],"dataset":{"id":"42bb9833-5c39-4aba-b2c4-da2ca1287728","name":{"pool_name":"oxp_1f91451d-a466-4c9a-a6e6-0abd7985595f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::a]:32345"},"services":[{"id":"42bb9833-5c39-4aba-b2c4-da2ca1287728","details":{"type":"crucible","address":"[fd00:1122:3344:119::a]:32345"}}]},"root":"/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone"},{"zone":{"id":"197695e1-d949-4982-b679-6e5c9ab4bcc7","zone_type":"crucible","addresses":["fd00:1122:3344:119::b"],"dataset":{"id":"197695e1-d949-4982-b679-6e5c9ab4bcc7","name":{"pool_name":"oxp_e0faea44-8b5c-40b0-bb75-a1aec1a10377","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::b]:32345"},"services":[{"id":"197695e1-d949-4982-b679-6e5c9ab4bcc7","details":{"type":"crucible","address":"[fd00:1122:3344:119::b]:32345"}}]},"root":"/pool/ext/b31e1815-cae0-4145-940c-874fff63bdd5/crypt/zone"},{"zone":{"id":"bf99d4f8-edf1-4de5-98d4-8e6a24965005","zone_type":"crucible","addresses":["fd00:1122:3344:119::8"],"dataset":{"id":"bf99d4f8-edf1-4de5-98d4-8e6a24965005","name":{"pool_name":"oxp_ef2c3afb-6962-4f6b-b567-14766bbd9ec0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::8]:32345"},"services":[{"id":"bf99d4f8-edf1-4de5-98d4-8e6a24965005","details":{"type":"crucible","address":"[fd00:1122:3344:119::8]:32345"}}]},"root":"/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone"},{"zone":{"id":"390d1853-8be9-4987-b8b6-f022999bf4e7","zone_type":"crucible","addresses":["fd00:1122:3344:119::7"],"dataset":{"id":"390d1853-8be9-4987-b8b6-f022999bf4e7","name":{"pool_name":"oxp_06eed00a-d8d3-4b9d-84c9-23fce535f63e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::7]:32345"},"services":[{"id":"390d1853-8be9-4987-b8b6-f022999bf4e7","details":{"type":"crucible","address":"[fd00:1122:3344:119::7]:32345"}}]},"root":"/pool/ext/ef2c3afb-6962-4f6b-b567-14766bbd9ec0/crypt/zone"},{"zone":{"id":"76fe2161-90df-41b5-9c94-067de9c29db1","zone_type":"crucible","addresses":["fd00:1122:3344:119::4"],"dataset":{"id":"76fe2161-90df-41b5-9c94-067de9c29db1","name":{"pool_name":"oxp_f5c73c28-2168-4321-b737-4ca6663155c9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::4]:32345"},"services":[{"id":"76fe2161-90df-41b5-9c94-067de9c29db1","details":{"type":"crucible","address":"[fd00:1122:3344:119::4]:32345"}}]},"root":"/pool/ext/ef2c3afb-6962-4f6b-b567-14766bbd9ec0/crypt/zone"},{"zone":{"id":"f49dc522-2b13-4055-964c-8315671096aa","zone_type":"crucible","addresses":["fd00:1122:3344:119::d"],"dataset":{"id":"f49dc522-2b13-4055-964c-8315671096aa","name":{"pool_name":"oxp_662c278b-7f5f-4c7e-91ff-70207e8a307b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::d]:32345"},"services":[{"id":"f49dc522-2b13-4055-964c-8315671096aa","details":{"type":"crucible","address":"[fd00:1122:3344:119::d]:32345"}}]},"root":"/pool/ext/1f91451d-a466-4c9a-a6e6-0abd7985595f/crypt/zone"},{"zone":{"id":"08cc7bd6-368e-4d16-a619-28b17eff35af","zone_type":"crucible","addresses":["fd00:1122:3344:119::9"],"dataset":{"id":"08cc7bd6-368e-4d16-a619-28b17eff35af","name":{"pool_name":"oxp_5516b9ac-b139-40da-aa3b-f094568ba095","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::9]:32345"},"services":[{"id":"08cc7bd6-368e-4d16-a619-28b17eff35af","details":{"type":"crucible","address":"[fd00:1122:3344:119::9]:32345"}}]},"root":"/pool/ext/06eed00a-d8d3-4b9d-84c9-23fce535f63e/crypt/zone"},{"zone":{"id":"74b0613f-bce8-4922-93e0-b5bfccfc8443","zone_type":"crucible","addresses":["fd00:1122:3344:119::5"],"dataset":{"id":"74b0613f-bce8-4922-93e0-b5bfccfc8443","name":{"pool_name":"oxp_b31e1815-cae0-4145-940c-874fff63bdd5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::5]:32345"},"services":[{"id":"74b0613f-bce8-4922-93e0-b5bfccfc8443","details":{"type":"crucible","address":"[fd00:1122:3344:119::5]:32345"}}]},"root":"/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone"},{"zone":{"id":"55fcfc62-8435-475f-a2aa-29373901b993","zone_type":"crucible","addresses":["fd00:1122:3344:119::6"],"dataset":{"id":"55fcfc62-8435-475f-a2aa-29373901b993","name":{"pool_name":"oxp_eadf6a03-1028-4d48-ac0d-0d27ef2c8c0f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:119::6]:32345"},"services":[{"id":"55fcfc62-8435-475f-a2aa-29373901b993","details":{"type":"crucible","address":"[fd00:1122:3344:119::6]:32345"}}]},"root":"/pool/ext/1f91451d-a466-4c9a-a6e6-0abd7985595f/crypt/zone"},{"zone":{"id":"d52ccea3-6d7f-43a6-a19f-e0409f4e9cdc","zone_type":"ntp","addresses":["fd00:1122:3344:119::e"],"dataset":null,"services":[{"id":"d52ccea3-6d7f-43a6-a19f-e0409f4e9cdc","details":{"type":"internal_ntp","address":"[fd00:1122:3344:119::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/f5c73c28-2168-4321-b737-4ca6663155c9/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled27.json b/sled-agent/tests/old-service-ledgers/rack3-sled27.json new file mode 100644 index 0000000000..0b2db29c4a --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled27.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"095e612f-e218-4a16-aa6e-98c3d69a470a","zone_type":"crucible","addresses":["fd00:1122:3344:10d::a"],"dataset":{"id":"095e612f-e218-4a16-aa6e-98c3d69a470a","name":{"pool_name":"oxp_9f657858-623f-4d78-9841-6e620b5ede30","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::a]:32345"},"services":[{"id":"095e612f-e218-4a16-aa6e-98c3d69a470a","details":{"type":"crucible","address":"[fd00:1122:3344:10d::a]:32345"}}]},"root":"/pool/ext/2d086b51-2b77-4bc7-adc6-43586ea38ce9/crypt/zone"},{"zone":{"id":"de818730-0e3b-4567-94e7-344bd9b6f564","zone_type":"crucible","addresses":["fd00:1122:3344:10d::3"],"dataset":{"id":"de818730-0e3b-4567-94e7-344bd9b6f564","name":{"pool_name":"oxp_ba6ab301-07e1-4d35-80ac-59612f2c2bdb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::3]:32345"},"services":[{"id":"de818730-0e3b-4567-94e7-344bd9b6f564","details":{"type":"crucible","address":"[fd00:1122:3344:10d::3]:32345"}}]},"root":"/pool/ext/7cee2806-e898-47d8-b568-e276a6e271f8/crypt/zone"},{"zone":{"id":"6a21dc3c-3a9d-4520-9a91-7d8f2737bcd4","zone_type":"crucible","addresses":["fd00:1122:3344:10d::4"],"dataset":{"id":"6a21dc3c-3a9d-4520-9a91-7d8f2737bcd4","name":{"pool_name":"oxp_7cee2806-e898-47d8-b568-e276a6e271f8","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::4]:32345"},"services":[{"id":"6a21dc3c-3a9d-4520-9a91-7d8f2737bcd4","details":{"type":"crucible","address":"[fd00:1122:3344:10d::4]:32345"}}]},"root":"/pool/ext/cef23d87-31ed-40d5-99b8-12d7be8e46e7/crypt/zone"},{"zone":{"id":"e01b7f45-b8d7-4944-ba5b-41fb699889a9","zone_type":"crucible","addresses":["fd00:1122:3344:10d::b"],"dataset":{"id":"e01b7f45-b8d7-4944-ba5b-41fb699889a9","name":{"pool_name":"oxp_d9af8878-50bd-4425-95d9-e6556ce92cfa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::b]:32345"},"services":[{"id":"e01b7f45-b8d7-4944-ba5b-41fb699889a9","details":{"type":"crucible","address":"[fd00:1122:3344:10d::b]:32345"}}]},"root":"/pool/ext/6fe9bcaa-88cb-451d-b086-24a3ad53fa22/crypt/zone"},{"zone":{"id":"4271ef62-d319-4e80-b157-915321cec8c7","zone_type":"crucible","addresses":["fd00:1122:3344:10d::c"],"dataset":{"id":"4271ef62-d319-4e80-b157-915321cec8c7","name":{"pool_name":"oxp_ba8ee7dd-cdfb-48bd-92ce-4dc45e070930","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::c]:32345"},"services":[{"id":"4271ef62-d319-4e80-b157-915321cec8c7","details":{"type":"crucible","address":"[fd00:1122:3344:10d::c]:32345"}}]},"root":"/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone"},{"zone":{"id":"6bdcc159-aeb9-4903-9486-dd8b43a3dc16","zone_type":"crucible","addresses":["fd00:1122:3344:10d::8"],"dataset":{"id":"6bdcc159-aeb9-4903-9486-dd8b43a3dc16","name":{"pool_name":"oxp_5b03a5dc-bb5a-4bf4-bc21-0af849cd1dab","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::8]:32345"},"services":[{"id":"6bdcc159-aeb9-4903-9486-dd8b43a3dc16","details":{"type":"crucible","address":"[fd00:1122:3344:10d::8]:32345"}}]},"root":"/pool/ext/d9af8878-50bd-4425-95d9-e6556ce92cfa/crypt/zone"},{"zone":{"id":"85540e54-cdd7-4baa-920c-5cf54cbc1f83","zone_type":"crucible","addresses":["fd00:1122:3344:10d::7"],"dataset":{"id":"85540e54-cdd7-4baa-920c-5cf54cbc1f83","name":{"pool_name":"oxp_ee24f9a6-84ab-49a5-a28f-e394abfcaa95","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::7]:32345"},"services":[{"id":"85540e54-cdd7-4baa-920c-5cf54cbc1f83","details":{"type":"crucible","address":"[fd00:1122:3344:10d::7]:32345"}}]},"root":"/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone"},{"zone":{"id":"750d1a0b-6a14-46c5-9a0b-a504caefb198","zone_type":"crucible","addresses":["fd00:1122:3344:10d::9"],"dataset":{"id":"750d1a0b-6a14-46c5-9a0b-a504caefb198","name":{"pool_name":"oxp_cef23d87-31ed-40d5-99b8-12d7be8e46e7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::9]:32345"},"services":[{"id":"750d1a0b-6a14-46c5-9a0b-a504caefb198","details":{"type":"crucible","address":"[fd00:1122:3344:10d::9]:32345"}}]},"root":"/pool/ext/ba8ee7dd-cdfb-48bd-92ce-4dc45e070930/crypt/zone"},{"zone":{"id":"b5996893-1a9a-434e-a257-d702694f058b","zone_type":"crucible","addresses":["fd00:1122:3344:10d::6"],"dataset":{"id":"b5996893-1a9a-434e-a257-d702694f058b","name":{"pool_name":"oxp_2d086b51-2b77-4bc7-adc6-43586ea38ce9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::6]:32345"},"services":[{"id":"b5996893-1a9a-434e-a257-d702694f058b","details":{"type":"crucible","address":"[fd00:1122:3344:10d::6]:32345"}}]},"root":"/pool/ext/7cee2806-e898-47d8-b568-e276a6e271f8/crypt/zone"},{"zone":{"id":"8b36686a-b98d-451a-9124-a3583000a83a","zone_type":"crucible","addresses":["fd00:1122:3344:10d::5"],"dataset":{"id":"8b36686a-b98d-451a-9124-a3583000a83a","name":{"pool_name":"oxp_6fe9bcaa-88cb-451d-b086-24a3ad53fa22","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10d::5]:32345"},"services":[{"id":"8b36686a-b98d-451a-9124-a3583000a83a","details":{"type":"crucible","address":"[fd00:1122:3344:10d::5]:32345"}}]},"root":"/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone"},{"zone":{"id":"88d695a2-c8c1-41af-85b0-77424f4d650d","zone_type":"ntp","addresses":["fd00:1122:3344:10d::d"],"dataset":null,"services":[{"id":"88d695a2-c8c1-41af-85b0-77424f4d650d","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10d::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/ba6ab301-07e1-4d35-80ac-59612f2c2bdb/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled28.json b/sled-agent/tests/old-service-ledgers/rack3-sled28.json new file mode 100644 index 0000000000..ec137c18fa --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled28.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"a126365d-f459-43bf-9f99-dbe1c4cdecf8","zone_type":"crucible","addresses":["fd00:1122:3344:113::4"],"dataset":{"id":"a126365d-f459-43bf-9f99-dbe1c4cdecf8","name":{"pool_name":"oxp_c99eabb2-6815-416a-9660-87e2609b357a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::4]:32345"},"services":[{"id":"a126365d-f459-43bf-9f99-dbe1c4cdecf8","details":{"type":"crucible","address":"[fd00:1122:3344:113::4]:32345"}}]},"root":"/pool/ext/6461a450-f043-4d1e-bc03-4a68ed5fe94a/crypt/zone"},{"zone":{"id":"52f57ef8-546a-43bd-a0f3-8c42b99c37a6","zone_type":"crucible","addresses":["fd00:1122:3344:113::3"],"dataset":{"id":"52f57ef8-546a-43bd-a0f3-8c42b99c37a6","name":{"pool_name":"oxp_f6530e9c-6d64-44fa-93d5-ae427916fbf1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::3]:32345"},"services":[{"id":"52f57ef8-546a-43bd-a0f3-8c42b99c37a6","details":{"type":"crucible","address":"[fd00:1122:3344:113::3]:32345"}}]},"root":"/pool/ext/97662260-6b62-450f-9d7e-42f7dee5d568/crypt/zone"},{"zone":{"id":"3ee87855-9423-43ff-800a-fa4fdbf1d956","zone_type":"crucible","addresses":["fd00:1122:3344:113::a"],"dataset":{"id":"3ee87855-9423-43ff-800a-fa4fdbf1d956","name":{"pool_name":"oxp_6461a450-f043-4d1e-bc03-4a68ed5fe94a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::a]:32345"},"services":[{"id":"3ee87855-9423-43ff-800a-fa4fdbf1d956","details":{"type":"crucible","address":"[fd00:1122:3344:113::a]:32345"}}]},"root":"/pool/ext/9515dc86-fe62-4d4f-b38d-b3461cc042fc/crypt/zone"},{"zone":{"id":"55d0ddf9-9b24-4a7a-b97f-248e240f9ba6","zone_type":"crucible","addresses":["fd00:1122:3344:113::5"],"dataset":{"id":"55d0ddf9-9b24-4a7a-b97f-248e240f9ba6","name":{"pool_name":"oxp_97662260-6b62-450f-9d7e-42f7dee5d568","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::5]:32345"},"services":[{"id":"55d0ddf9-9b24-4a7a-b97f-248e240f9ba6","details":{"type":"crucible","address":"[fd00:1122:3344:113::5]:32345"}}]},"root":"/pool/ext/9515dc86-fe62-4d4f-b38d-b3461cc042fc/crypt/zone"},{"zone":{"id":"014cad37-56a7-4b2a-9c9e-505b15b4de85","zone_type":"crucible","addresses":["fd00:1122:3344:113::b"],"dataset":{"id":"014cad37-56a7-4b2a-9c9e-505b15b4de85","name":{"pool_name":"oxp_8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::b]:32345"},"services":[{"id":"014cad37-56a7-4b2a-9c9e-505b15b4de85","details":{"type":"crucible","address":"[fd00:1122:3344:113::b]:32345"}}]},"root":"/pool/ext/6461a450-f043-4d1e-bc03-4a68ed5fe94a/crypt/zone"},{"zone":{"id":"e14fb192-aaab-42ab-aa86-c85f13955940","zone_type":"crucible","addresses":["fd00:1122:3344:113::6"],"dataset":{"id":"e14fb192-aaab-42ab-aa86-c85f13955940","name":{"pool_name":"oxp_5a9455ca-fb01-4549-9a70-7579c031779d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::6]:32345"},"services":[{"id":"e14fb192-aaab-42ab-aa86-c85f13955940","details":{"type":"crucible","address":"[fd00:1122:3344:113::6]:32345"}}]},"root":"/pool/ext/f6530e9c-6d64-44fa-93d5-ae427916fbf1/crypt/zone"},{"zone":{"id":"14540609-9371-442b-8486-88c244e97cd4","zone_type":"crucible","addresses":["fd00:1122:3344:113::8"],"dataset":{"id":"14540609-9371-442b-8486-88c244e97cd4","name":{"pool_name":"oxp_2916d6f3-8775-4887-a6d3-f9723982756f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::8]:32345"},"services":[{"id":"14540609-9371-442b-8486-88c244e97cd4","details":{"type":"crucible","address":"[fd00:1122:3344:113::8]:32345"}}]},"root":"/pool/ext/8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90/crypt/zone"},{"zone":{"id":"97a6b35f-0af9-41eb-93a1-f8bc5dbba357","zone_type":"crucible","addresses":["fd00:1122:3344:113::7"],"dataset":{"id":"97a6b35f-0af9-41eb-93a1-f8bc5dbba357","name":{"pool_name":"oxp_9515dc86-fe62-4d4f-b38d-b3461cc042fc","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::7]:32345"},"services":[{"id":"97a6b35f-0af9-41eb-93a1-f8bc5dbba357","details":{"type":"crucible","address":"[fd00:1122:3344:113::7]:32345"}}]},"root":"/pool/ext/8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90/crypt/zone"},{"zone":{"id":"5734aa24-cb66-4b0a-9eb2-564646f8d729","zone_type":"crucible","addresses":["fd00:1122:3344:113::9"],"dataset":{"id":"5734aa24-cb66-4b0a-9eb2-564646f8d729","name":{"pool_name":"oxp_9f889a6c-17b1-4edd-9659-458d91439dc1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::9]:32345"},"services":[{"id":"5734aa24-cb66-4b0a-9eb2-564646f8d729","details":{"type":"crucible","address":"[fd00:1122:3344:113::9]:32345"}}]},"root":"/pool/ext/a5074e7f-8d3b-40e0-a79e-dbd9af9d5693/crypt/zone"},{"zone":{"id":"ba86eca1-1427-4540-b4a6-1d9a0e1bc656","zone_type":"crucible","addresses":["fd00:1122:3344:113::c"],"dataset":{"id":"ba86eca1-1427-4540-b4a6-1d9a0e1bc656","name":{"pool_name":"oxp_a5074e7f-8d3b-40e0-a79e-dbd9af9d5693","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:113::c]:32345"},"services":[{"id":"ba86eca1-1427-4540-b4a6-1d9a0e1bc656","details":{"type":"crucible","address":"[fd00:1122:3344:113::c]:32345"}}]},"root":"/pool/ext/2916d6f3-8775-4887-a6d3-f9723982756f/crypt/zone"},{"zone":{"id":"6634dbc4-d22f-40a4-8cd3-4f271d781fa1","zone_type":"ntp","addresses":["fd00:1122:3344:113::d"],"dataset":null,"services":[{"id":"6634dbc4-d22f-40a4-8cd3-4f271d781fa1","details":{"type":"internal_ntp","address":"[fd00:1122:3344:113::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/a5074e7f-8d3b-40e0-a79e-dbd9af9d5693/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled29.json b/sled-agent/tests/old-service-ledgers/rack3-sled29.json new file mode 100644 index 0000000000..2618364e4f --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled29.json @@ -0,0 +1 @@ +{"generation":5,"requests":[{"zone":{"id":"1cdd1ebf-9321-4f2d-914c-1e617f60b41a","zone_type":"crucible","addresses":["fd00:1122:3344:120::8"],"dataset":{"id":"1cdd1ebf-9321-4f2d-914c-1e617f60b41a","name":{"pool_name":"oxp_74046573-78a2-46b4-86dc-40bb2ee29dd5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::8]:32345"},"services":[{"id":"1cdd1ebf-9321-4f2d-914c-1e617f60b41a","details":{"type":"crucible","address":"[fd00:1122:3344:120::8]:32345"}}]},"root":"/pool/ext/c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e/crypt/zone"},{"zone":{"id":"720a0d08-d1c0-43ba-af86-f2dac1a53639","zone_type":"crucible","addresses":["fd00:1122:3344:120::c"],"dataset":{"id":"720a0d08-d1c0-43ba-af86-f2dac1a53639","name":{"pool_name":"oxp_068d2790-1044-41ed-97a5-b493490b14d1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::c]:32345"},"services":[{"id":"720a0d08-d1c0-43ba-af86-f2dac1a53639","details":{"type":"crucible","address":"[fd00:1122:3344:120::c]:32345"}}]},"root":"/pool/ext/86cd16cf-d00d-40bc-b14a-8220b1e11476/crypt/zone"},{"zone":{"id":"d9f0b97b-2cef-4155-b45f-7db89263e4cf","zone_type":"crucible","addresses":["fd00:1122:3344:120::9"],"dataset":{"id":"d9f0b97b-2cef-4155-b45f-7db89263e4cf","name":{"pool_name":"oxp_8171bf0d-e61e-43f9-87d6-ec8833b80102","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::9]:32345"},"services":[{"id":"d9f0b97b-2cef-4155-b45f-7db89263e4cf","details":{"type":"crucible","address":"[fd00:1122:3344:120::9]:32345"}}]},"root":"/pool/ext/86cd16cf-d00d-40bc-b14a-8220b1e11476/crypt/zone"},{"zone":{"id":"018edff1-0d95-45a3-9a01-39c419bec55a","zone_type":"crucible","addresses":["fd00:1122:3344:120::b"],"dataset":{"id":"018edff1-0d95-45a3-9a01-39c419bec55a","name":{"pool_name":"oxp_0b11e026-f265-49a0-935f-7b234c19c789","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::b]:32345"},"services":[{"id":"018edff1-0d95-45a3-9a01-39c419bec55a","details":{"type":"crucible","address":"[fd00:1122:3344:120::b]:32345"}}]},"root":"/pool/ext/35db8700-d6a7-498c-9d2c-08eb9ab41b7c/crypt/zone"},{"zone":{"id":"f8cc1c1e-a556-436c-836d-42052101c38a","zone_type":"crucible","addresses":["fd00:1122:3344:120::3"],"dataset":{"id":"f8cc1c1e-a556-436c-836d-42052101c38a","name":{"pool_name":"oxp_ed8e5a26-5591-405a-b792-408f5b16e444","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::3]:32345"},"services":[{"id":"f8cc1c1e-a556-436c-836d-42052101c38a","details":{"type":"crucible","address":"[fd00:1122:3344:120::3]:32345"}}]},"root":"/pool/ext/1069bdee-fe5a-4164-a856-ff8ae56c07fb/crypt/zone"},{"zone":{"id":"f9600313-fac0-45a1-a1b5-02dd6af468b9","zone_type":"crucible","addresses":["fd00:1122:3344:120::4"],"dataset":{"id":"f9600313-fac0-45a1-a1b5-02dd6af468b9","name":{"pool_name":"oxp_c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::4]:32345"},"services":[{"id":"f9600313-fac0-45a1-a1b5-02dd6af468b9","details":{"type":"crucible","address":"[fd00:1122:3344:120::4]:32345"}}]},"root":"/pool/ext/74046573-78a2-46b4-86dc-40bb2ee29dd5/crypt/zone"},{"zone":{"id":"869e4f7c-5312-4b98-bacc-1508f236bf5a","zone_type":"crucible","addresses":["fd00:1122:3344:120::6"],"dataset":{"id":"869e4f7c-5312-4b98-bacc-1508f236bf5a","name":{"pool_name":"oxp_04aea8dc-4316-432f-a13a-d7d9b2efa3f2","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::6]:32345"},"services":[{"id":"869e4f7c-5312-4b98-bacc-1508f236bf5a","details":{"type":"crucible","address":"[fd00:1122:3344:120::6]:32345"}}]},"root":"/pool/ext/0b11e026-f265-49a0-935f-7b234c19c789/crypt/zone"},{"zone":{"id":"31ed5a0c-7caf-4825-b730-85ee94fe27f1","zone_type":"crucible","addresses":["fd00:1122:3344:120::a"],"dataset":{"id":"31ed5a0c-7caf-4825-b730-85ee94fe27f1","name":{"pool_name":"oxp_86cd16cf-d00d-40bc-b14a-8220b1e11476","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::a]:32345"},"services":[{"id":"31ed5a0c-7caf-4825-b730-85ee94fe27f1","details":{"type":"crucible","address":"[fd00:1122:3344:120::a]:32345"}}]},"root":"/pool/ext/04aea8dc-4316-432f-a13a-d7d9b2efa3f2/crypt/zone"},{"zone":{"id":"7e5a3c39-152a-4270-b01e-9e144cca4aaa","zone_type":"crucible","addresses":["fd00:1122:3344:120::5"],"dataset":{"id":"7e5a3c39-152a-4270-b01e-9e144cca4aaa","name":{"pool_name":"oxp_1069bdee-fe5a-4164-a856-ff8ae56c07fb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::5]:32345"},"services":[{"id":"7e5a3c39-152a-4270-b01e-9e144cca4aaa","details":{"type":"crucible","address":"[fd00:1122:3344:120::5]:32345"}}]},"root":"/pool/ext/04aea8dc-4316-432f-a13a-d7d9b2efa3f2/crypt/zone"},{"zone":{"id":"9a03a386-7304-4a86-bee8-153ef643195e","zone_type":"crucible","addresses":["fd00:1122:3344:120::7"],"dataset":{"id":"9a03a386-7304-4a86-bee8-153ef643195e","name":{"pool_name":"oxp_35db8700-d6a7-498c-9d2c-08eb9ab41b7c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:120::7]:32345"},"services":[{"id":"9a03a386-7304-4a86-bee8-153ef643195e","details":{"type":"crucible","address":"[fd00:1122:3344:120::7]:32345"}}]},"root":"/pool/ext/068d2790-1044-41ed-97a5-b493490b14d1/crypt/zone"},{"zone":{"id":"a800d0a7-1020-481c-8be8-ecfd28b7a2be","zone_type":"ntp","addresses":["fd00:1122:3344:120::d"],"dataset":null,"services":[{"id":"a800d0a7-1020-481c-8be8-ecfd28b7a2be","details":{"type":"internal_ntp","address":"[fd00:1122:3344:120::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e/crypt/zone"},{"zone":{"id":"be469efd-8e07-4b8e-bcee-6fd33373cdef","zone_type":"internal_dns","addresses":["fd00:1122:3344:3::1"],"dataset":{"id":"be469efd-8e07-4b8e-bcee-6fd33373cdef","name":{"pool_name":"oxp_ed8e5a26-5591-405a-b792-408f5b16e444","kind":{"type":"internal_dns"}},"service_address":"[fd00:1122:3344:3::1]:5353"},"services":[{"id":"be469efd-8e07-4b8e-bcee-6fd33373cdef","details":{"type":"internal_dns","http_address":"[fd00:1122:3344:3::1]:5353","dns_address":"[fd00:1122:3344:3::1]:53","gz_address":"fd00:1122:3344:3::2","gz_address_index":2}}]},"root":"/pool/ext/068d2790-1044-41ed-97a5-b493490b14d1/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled3.json b/sled-agent/tests/old-service-ledgers/rack3-sled3.json new file mode 100644 index 0000000000..6bcb626cf6 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled3.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"19d091b8-e005-4ff4-97e1-026de95e3667","zone_type":"crucible","addresses":["fd00:1122:3344:10f::c"],"dataset":{"id":"19d091b8-e005-4ff4-97e1-026de95e3667","name":{"pool_name":"oxp_11a63469-4f57-4976-8620-0055bf82dc97","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::c]:32345"},"services":[{"id":"19d091b8-e005-4ff4-97e1-026de95e3667","details":{"type":"crucible","address":"[fd00:1122:3344:10f::c]:32345"}}]},"root":"/pool/ext/6a73a62c-c636-4557-af45-042cb287aee6/crypt/zone"},{"zone":{"id":"57d77171-104e-4977-b2f9-9b529ee7f8a0","zone_type":"crucible","addresses":["fd00:1122:3344:10f::8"],"dataset":{"id":"57d77171-104e-4977-b2f9-9b529ee7f8a0","name":{"pool_name":"oxp_7f3060af-058f-4f52-ab80-902bd13e7ef4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::8]:32345"},"services":[{"id":"57d77171-104e-4977-b2f9-9b529ee7f8a0","details":{"type":"crucible","address":"[fd00:1122:3344:10f::8]:32345"}}]},"root":"/pool/ext/7f3060af-058f-4f52-ab80-902bd13e7ef4/crypt/zone"},{"zone":{"id":"b0371ccf-67da-4562-baf2-eaabe5243e9b","zone_type":"crucible","addresses":["fd00:1122:3344:10f::7"],"dataset":{"id":"b0371ccf-67da-4562-baf2-eaabe5243e9b","name":{"pool_name":"oxp_58ae04cb-26ff-4e30-a20d-9f847bafba4d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::7]:32345"},"services":[{"id":"b0371ccf-67da-4562-baf2-eaabe5243e9b","details":{"type":"crucible","address":"[fd00:1122:3344:10f::7]:32345"}}]},"root":"/pool/ext/125ddcda-f94b-46bc-a10a-94e9acf40265/crypt/zone"},{"zone":{"id":"ae3791ff-2657-4252-bd61-58ec5dc237cd","zone_type":"crucible","addresses":["fd00:1122:3344:10f::9"],"dataset":{"id":"ae3791ff-2657-4252-bd61-58ec5dc237cd","name":{"pool_name":"oxp_125ddcda-f94b-46bc-a10a-94e9acf40265","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::9]:32345"},"services":[{"id":"ae3791ff-2657-4252-bd61-58ec5dc237cd","details":{"type":"crucible","address":"[fd00:1122:3344:10f::9]:32345"}}]},"root":"/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone"},{"zone":{"id":"73f865dc-5db7-48c6-9dc4-dff56dd8c045","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:10f::3"],"dataset":null,"services":[{"id":"73f865dc-5db7-48c6-9dc4-dff56dd8c045","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:10f::3]:17000"}}]},"root":"/pool/ext/11a63469-4f57-4976-8620-0055bf82dc97/crypt/zone"},{"zone":{"id":"e5d0170a-0d60-4c51-8f72-4c301979690e","zone_type":"crucible","addresses":["fd00:1122:3344:10f::6"],"dataset":{"id":"e5d0170a-0d60-4c51-8f72-4c301979690e","name":{"pool_name":"oxp_efe4cbab-2a39-4d7d-ae6c-83eb3ab8d4b5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::6]:32345"},"services":[{"id":"e5d0170a-0d60-4c51-8f72-4c301979690e","details":{"type":"crucible","address":"[fd00:1122:3344:10f::6]:32345"}}]},"root":"/pool/ext/6a73a62c-c636-4557-af45-042cb287aee6/crypt/zone"},{"zone":{"id":"ea6894de-c575-43bc-86e9-65b8a58499ff","zone_type":"crucible","addresses":["fd00:1122:3344:10f::a"],"dataset":{"id":"ea6894de-c575-43bc-86e9-65b8a58499ff","name":{"pool_name":"oxp_a87dc882-8b88-4a99-9628-5db79072cffa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::a]:32345"},"services":[{"id":"ea6894de-c575-43bc-86e9-65b8a58499ff","details":{"type":"crucible","address":"[fd00:1122:3344:10f::a]:32345"}}]},"root":"/pool/ext/11a63469-4f57-4976-8620-0055bf82dc97/crypt/zone"},{"zone":{"id":"3081dc99-4fa9-4238-adfa-b9ca381c1f7b","zone_type":"crucible","addresses":["fd00:1122:3344:10f::b"],"dataset":{"id":"3081dc99-4fa9-4238-adfa-b9ca381c1f7b","name":{"pool_name":"oxp_6a73a62c-c636-4557-af45-042cb287aee6","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::b]:32345"},"services":[{"id":"3081dc99-4fa9-4238-adfa-b9ca381c1f7b","details":{"type":"crucible","address":"[fd00:1122:3344:10f::b]:32345"}}]},"root":"/pool/ext/a87dc882-8b88-4a99-9628-5db79072cffa/crypt/zone"},{"zone":{"id":"b4a3d7c8-487d-4d76-ae4e-a6a51595a5a6","zone_type":"crucible","addresses":["fd00:1122:3344:10f::d"],"dataset":{"id":"b4a3d7c8-487d-4d76-ae4e-a6a51595a5a6","name":{"pool_name":"oxp_a12f87ee-9918-4269-9de4-4bad4fb41caa","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::d]:32345"},"services":[{"id":"b4a3d7c8-487d-4d76-ae4e-a6a51595a5a6","details":{"type":"crucible","address":"[fd00:1122:3344:10f::d]:32345"}}]},"root":"/pool/ext/a12f87ee-9918-4269-9de4-4bad4fb41caa/crypt/zone"},{"zone":{"id":"5ebcee26-f76c-4206-8d81-584ac138d3b9","zone_type":"crucible","addresses":["fd00:1122:3344:10f::4"],"dataset":{"id":"5ebcee26-f76c-4206-8d81-584ac138d3b9","name":{"pool_name":"oxp_27f1917e-fb69-496a-9d40-8ef0d0c0ee55","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::4]:32345"},"services":[{"id":"5ebcee26-f76c-4206-8d81-584ac138d3b9","details":{"type":"crucible","address":"[fd00:1122:3344:10f::4]:32345"}}]},"root":"/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone"},{"zone":{"id":"90b2bc57-3a2a-4117-bb6d-7eda7542329a","zone_type":"crucible","addresses":["fd00:1122:3344:10f::5"],"dataset":{"id":"90b2bc57-3a2a-4117-bb6d-7eda7542329a","name":{"pool_name":"oxp_a222e405-40f6-4fdd-9146-94f7d94ed08a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10f::5]:32345"},"services":[{"id":"90b2bc57-3a2a-4117-bb6d-7eda7542329a","details":{"type":"crucible","address":"[fd00:1122:3344:10f::5]:32345"}}]},"root":"/pool/ext/a12f87ee-9918-4269-9de4-4bad4fb41caa/crypt/zone"},{"zone":{"id":"0fb540af-58d3-4abc-bfad-e49765c2b1ee","zone_type":"ntp","addresses":["fd00:1122:3344:10f::e"],"dataset":null,"services":[{"id":"0fb540af-58d3-4abc-bfad-e49765c2b1ee","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10f::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled30.json b/sled-agent/tests/old-service-ledgers/rack3-sled30.json new file mode 100644 index 0000000000..e919de3488 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled30.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"dda0f1c6-84a5-472c-b350-a799c8d3d0eb","zone_type":"crucible","addresses":["fd00:1122:3344:115::8"],"dataset":{"id":"dda0f1c6-84a5-472c-b350-a799c8d3d0eb","name":{"pool_name":"oxp_028b6c9e-5a0e-43d2-a8ed-a5946cf62924","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::8]:32345"},"services":[{"id":"dda0f1c6-84a5-472c-b350-a799c8d3d0eb","details":{"type":"crucible","address":"[fd00:1122:3344:115::8]:32345"}}]},"root":"/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone"},{"zone":{"id":"157672f9-113f-48b7-9808-dff3c3e67dcd","zone_type":"crucible","addresses":["fd00:1122:3344:115::a"],"dataset":{"id":"157672f9-113f-48b7-9808-dff3c3e67dcd","name":{"pool_name":"oxp_4fdca201-b37e-4072-a1cc-3cb7705954eb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::a]:32345"},"services":[{"id":"157672f9-113f-48b7-9808-dff3c3e67dcd","details":{"type":"crucible","address":"[fd00:1122:3344:115::a]:32345"}}]},"root":"/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone"},{"zone":{"id":"5a7d4f67-a70f-4d8b-8d35-4dc600991fb5","zone_type":"crucible","addresses":["fd00:1122:3344:115::5"],"dataset":{"id":"5a7d4f67-a70f-4d8b-8d35-4dc600991fb5","name":{"pool_name":"oxp_11a991e5-19a9-48b0-8186-34249ef67957","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::5]:32345"},"services":[{"id":"5a7d4f67-a70f-4d8b-8d35-4dc600991fb5","details":{"type":"crucible","address":"[fd00:1122:3344:115::5]:32345"}}]},"root":"/pool/ext/1e9c9764-aaa4-4681-b110-a937b4c52748/crypt/zone"},{"zone":{"id":"c7036645-b680-4816-834f-8ae1af24c159","zone_type":"crucible","addresses":["fd00:1122:3344:115::b"],"dataset":{"id":"c7036645-b680-4816-834f-8ae1af24c159","name":{"pool_name":"oxp_0780be56-c13d-4c6a-a1ac-37753a0da820","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::b]:32345"},"services":[{"id":"c7036645-b680-4816-834f-8ae1af24c159","details":{"type":"crucible","address":"[fd00:1122:3344:115::b]:32345"}}]},"root":"/pool/ext/80a8d756-ee22-4c88-8b5b-4a46f7eca249/crypt/zone"},{"zone":{"id":"45e47e4b-708f-40b5-a8c8-fbfd73696d45","zone_type":"crucible","addresses":["fd00:1122:3344:115::7"],"dataset":{"id":"45e47e4b-708f-40b5-a8c8-fbfd73696d45","name":{"pool_name":"oxp_80a8d756-ee22-4c88-8b5b-4a46f7eca249","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::7]:32345"},"services":[{"id":"45e47e4b-708f-40b5-a8c8-fbfd73696d45","details":{"type":"crucible","address":"[fd00:1122:3344:115::7]:32345"}}]},"root":"/pool/ext/4fdca201-b37e-4072-a1cc-3cb7705954eb/crypt/zone"},{"zone":{"id":"e805b0c1-3f80-49da-8dc1-caaf843e5003","zone_type":"crucible","addresses":["fd00:1122:3344:115::c"],"dataset":{"id":"e805b0c1-3f80-49da-8dc1-caaf843e5003","name":{"pool_name":"oxp_d54e1ed7-e589-4413-a487-6e9a257104e7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::c]:32345"},"services":[{"id":"e805b0c1-3f80-49da-8dc1-caaf843e5003","details":{"type":"crucible","address":"[fd00:1122:3344:115::c]:32345"}}]},"root":"/pool/ext/d54e1ed7-e589-4413-a487-6e9a257104e7/crypt/zone"},{"zone":{"id":"e47d3f81-3df6-4c35-bec6-41277bc74c07","zone_type":"crucible","addresses":["fd00:1122:3344:115::4"],"dataset":{"id":"e47d3f81-3df6-4c35-bec6-41277bc74c07","name":{"pool_name":"oxp_b8d84b9c-a65e-4c86-8196-69da5317ae63","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::4]:32345"},"services":[{"id":"e47d3f81-3df6-4c35-bec6-41277bc74c07","details":{"type":"crucible","address":"[fd00:1122:3344:115::4]:32345"}}]},"root":"/pool/ext/772b3aaa-3501-4dc7-9b3d-048b8b1f7970/crypt/zone"},{"zone":{"id":"2a796a69-b061-44c7-b2df-35bc611f10f5","zone_type":"crucible","addresses":["fd00:1122:3344:115::6"],"dataset":{"id":"2a796a69-b061-44c7-b2df-35bc611f10f5","name":{"pool_name":"oxp_73abe9e0-d38e-48fc-bdec-b094bfa5670d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::6]:32345"},"services":[{"id":"2a796a69-b061-44c7-b2df-35bc611f10f5","details":{"type":"crucible","address":"[fd00:1122:3344:115::6]:32345"}}]},"root":"/pool/ext/028b6c9e-5a0e-43d2-a8ed-a5946cf62924/crypt/zone"},{"zone":{"id":"4e1d2af1-8ef4-4762-aa80-b08da08b45bb","zone_type":"crucible","addresses":["fd00:1122:3344:115::3"],"dataset":{"id":"4e1d2af1-8ef4-4762-aa80-b08da08b45bb","name":{"pool_name":"oxp_772b3aaa-3501-4dc7-9b3d-048b8b1f7970","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::3]:32345"},"services":[{"id":"4e1d2af1-8ef4-4762-aa80-b08da08b45bb","details":{"type":"crucible","address":"[fd00:1122:3344:115::3]:32345"}}]},"root":"/pool/ext/d54e1ed7-e589-4413-a487-6e9a257104e7/crypt/zone"},{"zone":{"id":"fb1b10d5-b7cb-416d-98fc-b5d3bc02d495","zone_type":"crucible","addresses":["fd00:1122:3344:115::9"],"dataset":{"id":"fb1b10d5-b7cb-416d-98fc-b5d3bc02d495","name":{"pool_name":"oxp_1e9c9764-aaa4-4681-b110-a937b4c52748","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:115::9]:32345"},"services":[{"id":"fb1b10d5-b7cb-416d-98fc-b5d3bc02d495","details":{"type":"crucible","address":"[fd00:1122:3344:115::9]:32345"}}]},"root":"/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone"},{"zone":{"id":"5155463c-8a09-45a5-ad1b-817f2e93b284","zone_type":"ntp","addresses":["fd00:1122:3344:115::d"],"dataset":null,"services":[{"id":"5155463c-8a09-45a5-ad1b-817f2e93b284","details":{"type":"internal_ntp","address":"[fd00:1122:3344:115::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/772b3aaa-3501-4dc7-9b3d-048b8b1f7970/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled31.json b/sled-agent/tests/old-service-ledgers/rack3-sled31.json new file mode 100644 index 0000000000..d984227227 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled31.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"a0eae689-8e6b-4297-bb3d-8b7ffc5c4a07","zone_type":"crucible","addresses":["fd00:1122:3344:102::c"],"dataset":{"id":"a0eae689-8e6b-4297-bb3d-8b7ffc5c4a07","name":{"pool_name":"oxp_274cb567-fd74-4e00-b9c7-6ca367b3fda4","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::c]:32345"},"services":[{"id":"a0eae689-8e6b-4297-bb3d-8b7ffc5c4a07","details":{"type":"crucible","address":"[fd00:1122:3344:102::c]:32345"}}]},"root":"/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone"},{"zone":{"id":"9cea406d-451e-4328-9052-b58487f799a5","zone_type":"crucible","addresses":["fd00:1122:3344:102::b"],"dataset":{"id":"9cea406d-451e-4328-9052-b58487f799a5","name":{"pool_name":"oxp_89c7f72e-632c-462b-a515-01cd80683711","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::b]:32345"},"services":[{"id":"9cea406d-451e-4328-9052-b58487f799a5","details":{"type":"crucible","address":"[fd00:1122:3344:102::b]:32345"}}]},"root":"/pool/ext/274cb567-fd74-4e00-b9c7-6ca367b3fda4/crypt/zone"},{"zone":{"id":"9c7dad7e-7f60-4bf4-8efc-0883a17e7cf6","zone_type":"crucible","addresses":["fd00:1122:3344:102::6"],"dataset":{"id":"9c7dad7e-7f60-4bf4-8efc-0883a17e7cf6","name":{"pool_name":"oxp_2c8e5637-b989-4b8f-82ac-ff2e9102b560","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::6]:32345"},"services":[{"id":"9c7dad7e-7f60-4bf4-8efc-0883a17e7cf6","details":{"type":"crucible","address":"[fd00:1122:3344:102::6]:32345"}}]},"root":"/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone"},{"zone":{"id":"73015cba-79c6-4a67-97d8-fa0819cbf750","zone_type":"crucible","addresses":["fd00:1122:3344:102::a"],"dataset":{"id":"73015cba-79c6-4a67-97d8-fa0819cbf750","name":{"pool_name":"oxp_fa62108e-f7bb-4f6d-86f3-8094a1ea8352","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::a]:32345"},"services":[{"id":"73015cba-79c6-4a67-97d8-fa0819cbf750","details":{"type":"crucible","address":"[fd00:1122:3344:102::a]:32345"}}]},"root":"/pool/ext/2c8e5637-b989-4b8f-82ac-ff2e9102b560/crypt/zone"},{"zone":{"id":"f9ca3097-072e-4e7f-9f50-eb7c7ae39b6f","zone_type":"crucible","addresses":["fd00:1122:3344:102::5"],"dataset":{"id":"f9ca3097-072e-4e7f-9f50-eb7c7ae39b6f","name":{"pool_name":"oxp_42c6602c-2ccf-48ce-8344-693c832fd693","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::5]:32345"},"services":[{"id":"f9ca3097-072e-4e7f-9f50-eb7c7ae39b6f","details":{"type":"crucible","address":"[fd00:1122:3344:102::5]:32345"}}]},"root":"/pool/ext/2c8e5637-b989-4b8f-82ac-ff2e9102b560/crypt/zone"},{"zone":{"id":"e7855e05-a125-4a80-ac2c-8a2db96e1bf8","zone_type":"crucible","addresses":["fd00:1122:3344:102::7"],"dataset":{"id":"e7855e05-a125-4a80-ac2c-8a2db96e1bf8","name":{"pool_name":"oxp_1f72afd3-d2aa-46a8-b81a-54dbcc2f6317","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::7]:32345"},"services":[{"id":"e7855e05-a125-4a80-ac2c-8a2db96e1bf8","details":{"type":"crucible","address":"[fd00:1122:3344:102::7]:32345"}}]},"root":"/pool/ext/42c6602c-2ccf-48ce-8344-693c832fd693/crypt/zone"},{"zone":{"id":"e5de9bc9-e996-4fea-8318-ad7a8a6be4a3","zone_type":"crucible","addresses":["fd00:1122:3344:102::4"],"dataset":{"id":"e5de9bc9-e996-4fea-8318-ad7a8a6be4a3","name":{"pool_name":"oxp_1443b190-de16-42b0-b881-e87e875dd507","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::4]:32345"},"services":[{"id":"e5de9bc9-e996-4fea-8318-ad7a8a6be4a3","details":{"type":"crucible","address":"[fd00:1122:3344:102::4]:32345"}}]},"root":"/pool/ext/89c7f72e-632c-462b-a515-01cd80683711/crypt/zone"},{"zone":{"id":"cd0d0aac-44ff-4566-9260-a64ae6cecef4","zone_type":"crucible","addresses":["fd00:1122:3344:102::8"],"dataset":{"id":"cd0d0aac-44ff-4566-9260-a64ae6cecef4","name":{"pool_name":"oxp_92c0d1f6-cb4d-4ddb-b5ba-979fb3491812","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::8]:32345"},"services":[{"id":"cd0d0aac-44ff-4566-9260-a64ae6cecef4","details":{"type":"crucible","address":"[fd00:1122:3344:102::8]:32345"}}]},"root":"/pool/ext/89c7f72e-632c-462b-a515-01cd80683711/crypt/zone"},{"zone":{"id":"a8230592-0e7a-46c8-a653-7587a27f05bf","zone_type":"crucible","addresses":["fd00:1122:3344:102::9"],"dataset":{"id":"a8230592-0e7a-46c8-a653-7587a27f05bf","name":{"pool_name":"oxp_1b7873de-99fd-454f-b576-bff695524133","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::9]:32345"},"services":[{"id":"a8230592-0e7a-46c8-a653-7587a27f05bf","details":{"type":"crucible","address":"[fd00:1122:3344:102::9]:32345"}}]},"root":"/pool/ext/92c0d1f6-cb4d-4ddb-b5ba-979fb3491812/crypt/zone"},{"zone":{"id":"c19ffbb1-4dc1-4825-a3cf-080e9b543b16","zone_type":"crucible","addresses":["fd00:1122:3344:102::d"],"dataset":{"id":"c19ffbb1-4dc1-4825-a3cf-080e9b543b16","name":{"pool_name":"oxp_67823df7-511c-4984-b98c-7a8f5c40c22d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:102::d]:32345"},"services":[{"id":"c19ffbb1-4dc1-4825-a3cf-080e9b543b16","details":{"type":"crucible","address":"[fd00:1122:3344:102::d]:32345"}}]},"root":"/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone"},{"zone":{"id":"ff30fe7c-51f3-43b9-a788-d8f94a7bb028","zone_type":"cockroach_db","addresses":["fd00:1122:3344:102::3"],"dataset":{"id":"ff30fe7c-51f3-43b9-a788-d8f94a7bb028","name":{"pool_name":"oxp_1443b190-de16-42b0-b881-e87e875dd507","kind":{"type":"cockroach_db"}},"service_address":"[fd00:1122:3344:102::3]:32221"},"services":[{"id":"ff30fe7c-51f3-43b9-a788-d8f94a7bb028","details":{"type":"cockroach_db","address":"[fd00:1122:3344:102::3]:32221"}}]},"root":"/pool/ext/fa62108e-f7bb-4f6d-86f3-8094a1ea8352/crypt/zone"},{"zone":{"id":"16b50c55-8117-4efd-aabf-0273677b89d5","zone_type":"ntp","addresses":["fd00:1122:3344:102::e"],"dataset":null,"services":[{"id":"16b50c55-8117-4efd-aabf-0273677b89d5","details":{"type":"internal_ntp","address":"[fd00:1122:3344:102::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/fa62108e-f7bb-4f6d-86f3-8094a1ea8352/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled4.json b/sled-agent/tests/old-service-ledgers/rack3-sled4.json new file mode 100644 index 0000000000..e9e5ce5569 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled4.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"22452953-ee80-4659-a555-8e027bf205b0","zone_type":"crucible","addresses":["fd00:1122:3344:10c::4"],"dataset":{"id":"22452953-ee80-4659-a555-8e027bf205b0","name":{"pool_name":"oxp_92ba1667-a6f7-4913-9b00-14825384c7bf","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::4]:32345"},"services":[{"id":"22452953-ee80-4659-a555-8e027bf205b0","details":{"type":"crucible","address":"[fd00:1122:3344:10c::4]:32345"}}]},"root":"/pool/ext/ab62b941-5f84-42c7-929d-295b20efffe7/crypt/zone"},{"zone":{"id":"9a5a2fcf-44a0-4468-979a-a71686cef627","zone_type":"crucible","addresses":["fd00:1122:3344:10c::3"],"dataset":{"id":"9a5a2fcf-44a0-4468-979a-a71686cef627","name":{"pool_name":"oxp_dbfdc981-1b81-4d7d-9449-9530890b199a","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::3]:32345"},"services":[{"id":"9a5a2fcf-44a0-4468-979a-a71686cef627","details":{"type":"crucible","address":"[fd00:1122:3344:10c::3]:32345"}}]},"root":"/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone"},{"zone":{"id":"a014f12e-2636-4258-af76-e01d9b8d1c1f","zone_type":"crucible","addresses":["fd00:1122:3344:10c::b"],"dataset":{"id":"a014f12e-2636-4258-af76-e01d9b8d1c1f","name":{"pool_name":"oxp_ab62b941-5f84-42c7-929d-295b20efffe7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::b]:32345"},"services":[{"id":"a014f12e-2636-4258-af76-e01d9b8d1c1f","details":{"type":"crucible","address":"[fd00:1122:3344:10c::b]:32345"}}]},"root":"/pool/ext/a624a843-1c4e-41c3-a1d2-4be7a6c57e9b/crypt/zone"},{"zone":{"id":"431768b8-26ba-4ab4-b616-9e183bb79b8b","zone_type":"crucible","addresses":["fd00:1122:3344:10c::7"],"dataset":{"id":"431768b8-26ba-4ab4-b616-9e183bb79b8b","name":{"pool_name":"oxp_7c121177-3210-4457-9b42-3657add6e166","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::7]:32345"},"services":[{"id":"431768b8-26ba-4ab4-b616-9e183bb79b8b","details":{"type":"crucible","address":"[fd00:1122:3344:10c::7]:32345"}}]},"root":"/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone"},{"zone":{"id":"22992c56-bd5a-4d0f-86c5-d6f8e87b7bbb","zone_type":"crucible","addresses":["fd00:1122:3344:10c::9"],"dataset":{"id":"22992c56-bd5a-4d0f-86c5-d6f8e87b7bbb","name":{"pool_name":"oxp_842bdd28-196e-4b18-83db-68bd81176a44","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::9]:32345"},"services":[{"id":"22992c56-bd5a-4d0f-86c5-d6f8e87b7bbb","details":{"type":"crucible","address":"[fd00:1122:3344:10c::9]:32345"}}]},"root":"/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone"},{"zone":{"id":"de376149-aa45-4660-9ae6-15e8ba4a4233","zone_type":"crucible","addresses":["fd00:1122:3344:10c::5"],"dataset":{"id":"de376149-aa45-4660-9ae6-15e8ba4a4233","name":{"pool_name":"oxp_25856a84-6707-4b94-81d1-b43d5bc990d7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::5]:32345"},"services":[{"id":"de376149-aa45-4660-9ae6-15e8ba4a4233","details":{"type":"crucible","address":"[fd00:1122:3344:10c::5]:32345"}}]},"root":"/pool/ext/7c121177-3210-4457-9b42-3657add6e166/crypt/zone"},{"zone":{"id":"ceeba69d-8c0a-47df-a37b-7f1b90f23016","zone_type":"crucible","addresses":["fd00:1122:3344:10c::a"],"dataset":{"id":"ceeba69d-8c0a-47df-a37b-7f1b90f23016","name":{"pool_name":"oxp_a624a843-1c4e-41c3-a1d2-4be7a6c57e9b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::a]:32345"},"services":[{"id":"ceeba69d-8c0a-47df-a37b-7f1b90f23016","details":{"type":"crucible","address":"[fd00:1122:3344:10c::a]:32345"}}]},"root":"/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone"},{"zone":{"id":"65293ce4-2e63-4336-9207-3c61f58667f9","zone_type":"crucible","addresses":["fd00:1122:3344:10c::c"],"dataset":{"id":"65293ce4-2e63-4336-9207-3c61f58667f9","name":{"pool_name":"oxp_74ac4da9-cdae-4c08-8431-11211184aa09","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::c]:32345"},"services":[{"id":"65293ce4-2e63-4336-9207-3c61f58667f9","details":{"type":"crucible","address":"[fd00:1122:3344:10c::c]:32345"}}]},"root":"/pool/ext/842bdd28-196e-4b18-83db-68bd81176a44/crypt/zone"},{"zone":{"id":"e8f55a5d-65f9-436c-bc25-1d1a7070e876","zone_type":"crucible","addresses":["fd00:1122:3344:10c::6"],"dataset":{"id":"e8f55a5d-65f9-436c-bc25-1d1a7070e876","name":{"pool_name":"oxp_9bfe385c-16dd-4209-bc0b-f28ae75d58e3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::6]:32345"},"services":[{"id":"e8f55a5d-65f9-436c-bc25-1d1a7070e876","details":{"type":"crucible","address":"[fd00:1122:3344:10c::6]:32345"}}]},"root":"/pool/ext/92ba1667-a6f7-4913-9b00-14825384c7bf/crypt/zone"},{"zone":{"id":"2dfbd4c6-afbf-4c8c-bf40-764f02727852","zone_type":"crucible","addresses":["fd00:1122:3344:10c::8"],"dataset":{"id":"2dfbd4c6-afbf-4c8c-bf40-764f02727852","name":{"pool_name":"oxp_55eb093d-6b6f-418c-9767-09afe4c51fff","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10c::8]:32345"},"services":[{"id":"2dfbd4c6-afbf-4c8c-bf40-764f02727852","details":{"type":"crucible","address":"[fd00:1122:3344:10c::8]:32345"}}]},"root":"/pool/ext/dbfdc981-1b81-4d7d-9449-9530890b199a/crypt/zone"},{"zone":{"id":"8c73baf7-1a58-4e2c-b4d1-966c89a18d03","zone_type":"ntp","addresses":["fd00:1122:3344:10c::d"],"dataset":null,"services":[{"id":"8c73baf7-1a58-4e2c-b4d1-966c89a18d03","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10c::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/842bdd28-196e-4b18-83db-68bd81176a44/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled5.json b/sled-agent/tests/old-service-ledgers/rack3-sled5.json new file mode 100644 index 0000000000..ea7b5ec40a --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled5.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"2f488e7b-fd93-48a6-8b2b-61f6e8336268","zone_type":"crucible","addresses":["fd00:1122:3344:101::b"],"dataset":{"id":"2f488e7b-fd93-48a6-8b2b-61f6e8336268","name":{"pool_name":"oxp_5840a3b7-f765-45d3-8a41-7f543f936bee","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::b]:32345"},"services":[{"id":"2f488e7b-fd93-48a6-8b2b-61f6e8336268","details":{"type":"crucible","address":"[fd00:1122:3344:101::b]:32345"}}]},"root":"/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone"},{"zone":{"id":"1ed5fd3f-933a-4921-a91f-5c286823f8d4","zone_type":"crucible","addresses":["fd00:1122:3344:101::a"],"dataset":{"id":"1ed5fd3f-933a-4921-a91f-5c286823f8d4","name":{"pool_name":"oxp_c1e807e7-b64a-4dbd-b845-ffed0b9a54f1","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::a]:32345"},"services":[{"id":"1ed5fd3f-933a-4921-a91f-5c286823f8d4","details":{"type":"crucible","address":"[fd00:1122:3344:101::a]:32345"}}]},"root":"/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone"},{"zone":{"id":"0f8f1013-465d-4b49-b55d-f0b9bf6f789a","zone_type":"crucible","addresses":["fd00:1122:3344:101::6"],"dataset":{"id":"0f8f1013-465d-4b49-b55d-f0b9bf6f789a","name":{"pool_name":"oxp_4dfa7003-0305-47f5-b23d-88a228c1e12e","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::6]:32345"},"services":[{"id":"0f8f1013-465d-4b49-b55d-f0b9bf6f789a","details":{"type":"crucible","address":"[fd00:1122:3344:101::6]:32345"}}]},"root":"/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone"},{"zone":{"id":"2e4ef017-6c62-40bc-bab5-f2e01addad22","zone_type":"crucible","addresses":["fd00:1122:3344:101::7"],"dataset":{"id":"2e4ef017-6c62-40bc-bab5-f2e01addad22","name":{"pool_name":"oxp_d94e9c58-e6d1-444b-b7d8-19ac17dea042","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::7]:32345"},"services":[{"id":"2e4ef017-6c62-40bc-bab5-f2e01addad22","details":{"type":"crucible","address":"[fd00:1122:3344:101::7]:32345"}}]},"root":"/pool/ext/c1e807e7-b64a-4dbd-b845-ffed0b9a54f1/crypt/zone"},{"zone":{"id":"6a0baf13-a80b-4778-a0ab-a69cd851de2d","zone_type":"crucible","addresses":["fd00:1122:3344:101::9"],"dataset":{"id":"6a0baf13-a80b-4778-a0ab-a69cd851de2d","name":{"pool_name":"oxp_be06ea9c-df86-4fec-b5dd-8809710893af","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::9]:32345"},"services":[{"id":"6a0baf13-a80b-4778-a0ab-a69cd851de2d","details":{"type":"crucible","address":"[fd00:1122:3344:101::9]:32345"}}]},"root":"/pool/ext/a9d419d4-5915-4a40-baa3-3512785de034/crypt/zone"},{"zone":{"id":"391ec257-fd47-4cc8-9bfa-49a0747a9a67","zone_type":"crucible","addresses":["fd00:1122:3344:101::8"],"dataset":{"id":"391ec257-fd47-4cc8-9bfa-49a0747a9a67","name":{"pool_name":"oxp_a9d419d4-5915-4a40-baa3-3512785de034","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::8]:32345"},"services":[{"id":"391ec257-fd47-4cc8-9bfa-49a0747a9a67","details":{"type":"crucible","address":"[fd00:1122:3344:101::8]:32345"}}]},"root":"/pool/ext/709d5d04-5dff-4558-8b5d-fbc2a7d83036/crypt/zone"},{"zone":{"id":"fd8e615a-f170-4da9-b8d0-2a5a123d8682","zone_type":"crucible_pantry","addresses":["fd00:1122:3344:101::3"],"dataset":null,"services":[{"id":"fd8e615a-f170-4da9-b8d0-2a5a123d8682","details":{"type":"crucible_pantry","address":"[fd00:1122:3344:101::3]:17000"}}]},"root":"/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone"},{"zone":{"id":"f8a793f4-cd08-49ec-8fee-6bcd37092fdc","zone_type":"crucible","addresses":["fd00:1122:3344:101::c"],"dataset":{"id":"f8a793f4-cd08-49ec-8fee-6bcd37092fdc","name":{"pool_name":"oxp_709d5d04-5dff-4558-8b5d-fbc2a7d83036","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::c]:32345"},"services":[{"id":"f8a793f4-cd08-49ec-8fee-6bcd37092fdc","details":{"type":"crucible","address":"[fd00:1122:3344:101::c]:32345"}}]},"root":"/pool/ext/d94e9c58-e6d1-444b-b7d8-19ac17dea042/crypt/zone"},{"zone":{"id":"c67d44be-d6b8-4a08-a7e0-3ab300749ad6","zone_type":"crucible","addresses":["fd00:1122:3344:101::4"],"dataset":{"id":"c67d44be-d6b8-4a08-a7e0-3ab300749ad6","name":{"pool_name":"oxp_231cd696-2839-4a9a-ae42-6d875a98a797","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::4]:32345"},"services":[{"id":"c67d44be-d6b8-4a08-a7e0-3ab300749ad6","details":{"type":"crucible","address":"[fd00:1122:3344:101::4]:32345"}}]},"root":"/pool/ext/709d5d04-5dff-4558-8b5d-fbc2a7d83036/crypt/zone"},{"zone":{"id":"e91b4957-8165-451d-9fa5-090c3a39f199","zone_type":"crucible","addresses":["fd00:1122:3344:101::d"],"dataset":{"id":"e91b4957-8165-451d-9fa5-090c3a39f199","name":{"pool_name":"oxp_dd084b76-1130-4ad3-9196-6b02be607fe9","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::d]:32345"},"services":[{"id":"e91b4957-8165-451d-9fa5-090c3a39f199","details":{"type":"crucible","address":"[fd00:1122:3344:101::d]:32345"}}]},"root":"/pool/ext/5840a3b7-f765-45d3-8a41-7f543f936bee/crypt/zone"},{"zone":{"id":"5e737b6e-d33d-4a2c-b8c0-3cad9d05a68f","zone_type":"crucible","addresses":["fd00:1122:3344:101::5"],"dataset":{"id":"5e737b6e-d33d-4a2c-b8c0-3cad9d05a68f","name":{"pool_name":"oxp_8fa4f837-c6f3-4c65-88d4-21eb3cd7ffee","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:101::5]:32345"},"services":[{"id":"5e737b6e-d33d-4a2c-b8c0-3cad9d05a68f","details":{"type":"crucible","address":"[fd00:1122:3344:101::5]:32345"}}]},"root":"/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone"},{"zone":{"id":"7e6b7816-b1a6-40f3-894a-a5d5c0571dbb","zone_type":"ntp","addresses":["fd00:1122:3344:101::e"],"dataset":null,"services":[{"id":"7e6b7816-b1a6-40f3-894a-a5d5c0571dbb","details":{"type":"internal_ntp","address":"[fd00:1122:3344:101::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled6.json b/sled-agent/tests/old-service-ledgers/rack3-sled6.json new file mode 100644 index 0000000000..2c499813cd --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled6.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"eafffae7-69fd-49e1-9541-7cf237ab12b3","zone_type":"crucible","addresses":["fd00:1122:3344:110::3"],"dataset":{"id":"eafffae7-69fd-49e1-9541-7cf237ab12b3","name":{"pool_name":"oxp_929404cd-2522-4440-b21c-91d466a9a7e0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::3]:32345"},"services":[{"id":"eafffae7-69fd-49e1-9541-7cf237ab12b3","details":{"type":"crucible","address":"[fd00:1122:3344:110::3]:32345"}}]},"root":"/pool/ext/aff390ed-8d70-49fa-9000-5420b54ab118/crypt/zone"},{"zone":{"id":"f4bccf15-d69f-402d-9bd2-7959a4cb2823","zone_type":"crucible","addresses":["fd00:1122:3344:110::9"],"dataset":{"id":"f4bccf15-d69f-402d-9bd2-7959a4cb2823","name":{"pool_name":"oxp_f80f96be-a3d7-490a-96a7-faf7da80a579","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::9]:32345"},"services":[{"id":"f4bccf15-d69f-402d-9bd2-7959a4cb2823","details":{"type":"crucible","address":"[fd00:1122:3344:110::9]:32345"}}]},"root":"/pool/ext/6bcd54c8-d4a8-429d-8f17-cf02615eb063/crypt/zone"},{"zone":{"id":"82e51c9d-c187-4baa-8307-e46eeafc5ff2","zone_type":"crucible","addresses":["fd00:1122:3344:110::5"],"dataset":{"id":"82e51c9d-c187-4baa-8307-e46eeafc5ff2","name":{"pool_name":"oxp_37d86199-6834-49d9-888a-88ff6f281b29","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::5]:32345"},"services":[{"id":"82e51c9d-c187-4baa-8307-e46eeafc5ff2","details":{"type":"crucible","address":"[fd00:1122:3344:110::5]:32345"}}]},"root":"/pool/ext/d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f/crypt/zone"},{"zone":{"id":"cf667caf-304c-40c4-acce-f0eb05d011ef","zone_type":"crucible","addresses":["fd00:1122:3344:110::8"],"dataset":{"id":"cf667caf-304c-40c4-acce-f0eb05d011ef","name":{"pool_name":"oxp_625c0110-644e-4d63-8321-b85ab5642260","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::8]:32345"},"services":[{"id":"cf667caf-304c-40c4-acce-f0eb05d011ef","details":{"type":"crucible","address":"[fd00:1122:3344:110::8]:32345"}}]},"root":"/pool/ext/d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f/crypt/zone"},{"zone":{"id":"14e60912-108e-4dd3-984e-2332a183b346","zone_type":"crucible","addresses":["fd00:1122:3344:110::b"],"dataset":{"id":"14e60912-108e-4dd3-984e-2332a183b346","name":{"pool_name":"oxp_fa6470f5-0a4c-4fef-b0b1-57c8749c6cca","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::b]:32345"},"services":[{"id":"14e60912-108e-4dd3-984e-2332a183b346","details":{"type":"crucible","address":"[fd00:1122:3344:110::b]:32345"}}]},"root":"/pool/ext/6c5ab641-3bd4-4d8c-96f4-4f56c1045142/crypt/zone"},{"zone":{"id":"1aacf923-c96f-4bab-acb0-63f28e86eef6","zone_type":"crucible","addresses":["fd00:1122:3344:110::c"],"dataset":{"id":"1aacf923-c96f-4bab-acb0-63f28e86eef6","name":{"pool_name":"oxp_21b0f3ed-d27f-4996-968b-bf2b494d9308","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::c]:32345"},"services":[{"id":"1aacf923-c96f-4bab-acb0-63f28e86eef6","details":{"type":"crucible","address":"[fd00:1122:3344:110::c]:32345"}}]},"root":"/pool/ext/625c0110-644e-4d63-8321-b85ab5642260/crypt/zone"},{"zone":{"id":"b9db0845-04d3-4dc1-84ba-224749562a6c","zone_type":"crucible","addresses":["fd00:1122:3344:110::6"],"dataset":{"id":"b9db0845-04d3-4dc1-84ba-224749562a6c","name":{"pool_name":"oxp_d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::6]:32345"},"services":[{"id":"b9db0845-04d3-4dc1-84ba-224749562a6c","details":{"type":"crucible","address":"[fd00:1122:3344:110::6]:32345"}}]},"root":"/pool/ext/aff390ed-8d70-49fa-9000-5420b54ab118/crypt/zone"},{"zone":{"id":"38b51865-ee80-4e1b-a40b-3452951f9022","zone_type":"crucible","addresses":["fd00:1122:3344:110::7"],"dataset":{"id":"38b51865-ee80-4e1b-a40b-3452951f9022","name":{"pool_name":"oxp_6bcd54c8-d4a8-429d-8f17-cf02615eb063","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::7]:32345"},"services":[{"id":"38b51865-ee80-4e1b-a40b-3452951f9022","details":{"type":"crucible","address":"[fd00:1122:3344:110::7]:32345"}}]},"root":"/pool/ext/37d86199-6834-49d9-888a-88ff6f281b29/crypt/zone"},{"zone":{"id":"4bc441f6-f7e5-4d68-8751-53ef1e251c47","zone_type":"crucible","addresses":["fd00:1122:3344:110::a"],"dataset":{"id":"4bc441f6-f7e5-4d68-8751-53ef1e251c47","name":{"pool_name":"oxp_6c5ab641-3bd4-4d8c-96f4-4f56c1045142","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::a]:32345"},"services":[{"id":"4bc441f6-f7e5-4d68-8751-53ef1e251c47","details":{"type":"crucible","address":"[fd00:1122:3344:110::a]:32345"}}]},"root":"/pool/ext/21b0f3ed-d27f-4996-968b-bf2b494d9308/crypt/zone"},{"zone":{"id":"d2c20cf8-ed4c-4815-add9-45996364f721","zone_type":"crucible","addresses":["fd00:1122:3344:110::4"],"dataset":{"id":"d2c20cf8-ed4c-4815-add9-45996364f721","name":{"pool_name":"oxp_aff390ed-8d70-49fa-9000-5420b54ab118","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:110::4]:32345"},"services":[{"id":"d2c20cf8-ed4c-4815-add9-45996364f721","details":{"type":"crucible","address":"[fd00:1122:3344:110::4]:32345"}}]},"root":"/pool/ext/6c5ab641-3bd4-4d8c-96f4-4f56c1045142/crypt/zone"},{"zone":{"id":"1bb548cb-889a-411e-8c67-d1b785225180","zone_type":"ntp","addresses":["fd00:1122:3344:110::d"],"dataset":null,"services":[{"id":"1bb548cb-889a-411e-8c67-d1b785225180","details":{"type":"internal_ntp","address":"[fd00:1122:3344:110::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/6bcd54c8-d4a8-429d-8f17-cf02615eb063/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled7.json b/sled-agent/tests/old-service-ledgers/rack3-sled7.json new file mode 100644 index 0000000000..fb701a2bdb --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled7.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"2eb74fa3-71ec-484c-8ffa-3daeab0e4c78","zone_type":"crucible","addresses":["fd00:1122:3344:11d::3"],"dataset":{"id":"2eb74fa3-71ec-484c-8ffa-3daeab0e4c78","name":{"pool_name":"oxp_c6b63fea-e3e2-4806-b8dc-bdfe7b5c3d89","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::3]:32345"},"services":[{"id":"2eb74fa3-71ec-484c-8ffa-3daeab0e4c78","details":{"type":"crucible","address":"[fd00:1122:3344:11d::3]:32345"}}]},"root":"/pool/ext/9f20cbae-7a63-4c31-9386-2ac3cbe12030/crypt/zone"},{"zone":{"id":"9f92bfcf-7435-44a6-8e77-0597f93cd0b4","zone_type":"crucible","addresses":["fd00:1122:3344:11d::7"],"dataset":{"id":"9f92bfcf-7435-44a6-8e77-0597f93cd0b4","name":{"pool_name":"oxp_9fa336f1-2b69-4ebf-9553-e3bab7e3e6ef","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::7]:32345"},"services":[{"id":"9f92bfcf-7435-44a6-8e77-0597f93cd0b4","details":{"type":"crucible","address":"[fd00:1122:3344:11d::7]:32345"}}]},"root":"/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone"},{"zone":{"id":"1bf9aed4-9fd3-4d87-b8e7-7f066d25ec1d","zone_type":"crucible","addresses":["fd00:1122:3344:11d::b"],"dataset":{"id":"1bf9aed4-9fd3-4d87-b8e7-7f066d25ec1d","name":{"pool_name":"oxp_a5a52f47-9c9a-4519-83dc-abc56619495d","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::b]:32345"},"services":[{"id":"1bf9aed4-9fd3-4d87-b8e7-7f066d25ec1d","details":{"type":"crucible","address":"[fd00:1122:3344:11d::b]:32345"}}]},"root":"/pool/ext/cbcad26e-5e52-41b7-9875-1a84d30d8a15/crypt/zone"},{"zone":{"id":"2a722aa7-cd8a-445d-83fe-57fc9b9a8249","zone_type":"crucible","addresses":["fd00:1122:3344:11d::8"],"dataset":{"id":"2a722aa7-cd8a-445d-83fe-57fc9b9a8249","name":{"pool_name":"oxp_1f4b71eb-505f-4706-912c-b13dd3f2eafb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::8]:32345"},"services":[{"id":"2a722aa7-cd8a-445d-83fe-57fc9b9a8249","details":{"type":"crucible","address":"[fd00:1122:3344:11d::8]:32345"}}]},"root":"/pool/ext/a5a52f47-9c9a-4519-83dc-abc56619495d/crypt/zone"},{"zone":{"id":"76af5b23-d833-435c-b848-2a09d9fad9a1","zone_type":"crucible","addresses":["fd00:1122:3344:11d::c"],"dataset":{"id":"76af5b23-d833-435c-b848-2a09d9fad9a1","name":{"pool_name":"oxp_cbcad26e-5e52-41b7-9875-1a84d30d8a15","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::c]:32345"},"services":[{"id":"76af5b23-d833-435c-b848-2a09d9fad9a1","details":{"type":"crucible","address":"[fd00:1122:3344:11d::c]:32345"}}]},"root":"/pool/ext/9f20cbae-7a63-4c31-9386-2ac3cbe12030/crypt/zone"},{"zone":{"id":"3a412bf4-a385-4e66-9ada-a87f6536d6ca","zone_type":"crucible","addresses":["fd00:1122:3344:11d::4"],"dataset":{"id":"3a412bf4-a385-4e66-9ada-a87f6536d6ca","name":{"pool_name":"oxp_e05a6264-63f2-4961-bc14-57b4f65614c0","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::4]:32345"},"services":[{"id":"3a412bf4-a385-4e66-9ada-a87f6536d6ca","details":{"type":"crucible","address":"[fd00:1122:3344:11d::4]:32345"}}]},"root":"/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone"},{"zone":{"id":"99a25fa7-8231-4a46-a6ec-ffc5281db1f8","zone_type":"crucible","addresses":["fd00:1122:3344:11d::5"],"dataset":{"id":"99a25fa7-8231-4a46-a6ec-ffc5281db1f8","name":{"pool_name":"oxp_722494ab-9a2b-481b-ac11-292fded682a5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::5]:32345"},"services":[{"id":"99a25fa7-8231-4a46-a6ec-ffc5281db1f8","details":{"type":"crucible","address":"[fd00:1122:3344:11d::5]:32345"}}]},"root":"/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone"},{"zone":{"id":"06c7ddc8-9b3e-48ef-9874-0c40874e9877","zone_type":"crucible","addresses":["fd00:1122:3344:11d::a"],"dataset":{"id":"06c7ddc8-9b3e-48ef-9874-0c40874e9877","name":{"pool_name":"oxp_8c3972d1-5b17-4479-88cc-1c33e4344160","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::a]:32345"},"services":[{"id":"06c7ddc8-9b3e-48ef-9874-0c40874e9877","details":{"type":"crucible","address":"[fd00:1122:3344:11d::a]:32345"}}]},"root":"/pool/ext/8c3972d1-5b17-4479-88cc-1c33e4344160/crypt/zone"},{"zone":{"id":"1212b2dc-157d-4bd3-94af-fb5db1d91f24","zone_type":"crucible","addresses":["fd00:1122:3344:11d::9"],"dataset":{"id":"1212b2dc-157d-4bd3-94af-fb5db1d91f24","name":{"pool_name":"oxp_9f20cbae-7a63-4c31-9386-2ac3cbe12030","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::9]:32345"},"services":[{"id":"1212b2dc-157d-4bd3-94af-fb5db1d91f24","details":{"type":"crucible","address":"[fd00:1122:3344:11d::9]:32345"}}]},"root":"/pool/ext/977aa6c3-2026-4178-9948-e09f78008575/crypt/zone"},{"zone":{"id":"b1fb5f2e-b20d-4f4c-9f6f-bbeb1a98dd50","zone_type":"crucible","addresses":["fd00:1122:3344:11d::6"],"dataset":{"id":"b1fb5f2e-b20d-4f4c-9f6f-bbeb1a98dd50","name":{"pool_name":"oxp_977aa6c3-2026-4178-9948-e09f78008575","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:11d::6]:32345"},"services":[{"id":"b1fb5f2e-b20d-4f4c-9f6f-bbeb1a98dd50","details":{"type":"crucible","address":"[fd00:1122:3344:11d::6]:32345"}}]},"root":"/pool/ext/722494ab-9a2b-481b-ac11-292fded682a5/crypt/zone"},{"zone":{"id":"e68dde0f-0647-46db-ae1c-711835c13e25","zone_type":"ntp","addresses":["fd00:1122:3344:11d::d"],"dataset":null,"services":[{"id":"e68dde0f-0647-46db-ae1c-711835c13e25","details":{"type":"internal_ntp","address":"[fd00:1122:3344:11d::d]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/1f4b71eb-505f-4706-912c-b13dd3f2eafb/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled8.json b/sled-agent/tests/old-service-ledgers/rack3-sled8.json new file mode 100644 index 0000000000..cf96f8ae81 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled8.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"85c18b7c-a100-458c-b18d-ecfdacaefac4","zone_type":"crucible","addresses":["fd00:1122:3344:10e::5"],"dataset":{"id":"85c18b7c-a100-458c-b18d-ecfdacaefac4","name":{"pool_name":"oxp_07b266bc-86c3-4a76-9522-8b34ba1ae78c","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::5]:32345"},"services":[{"id":"85c18b7c-a100-458c-b18d-ecfdacaefac4","details":{"type":"crucible","address":"[fd00:1122:3344:10e::5]:32345"}}]},"root":"/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone"},{"zone":{"id":"db303465-7879-4d86-8da8-a0c7162e5184","zone_type":"crucible","addresses":["fd00:1122:3344:10e::4"],"dataset":{"id":"db303465-7879-4d86-8da8-a0c7162e5184","name":{"pool_name":"oxp_e9488a32-880d-44a2-8948-db0b7e3a35b5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::4]:32345"},"services":[{"id":"db303465-7879-4d86-8da8-a0c7162e5184","details":{"type":"crucible","address":"[fd00:1122:3344:10e::4]:32345"}}]},"root":"/pool/ext/8d798756-7200-4db4-9faf-f41b75106a63/crypt/zone"},{"zone":{"id":"c44ce6be-512d-4104-9260-a5b8fe373937","zone_type":"crucible","addresses":["fd00:1122:3344:10e::9"],"dataset":{"id":"c44ce6be-512d-4104-9260-a5b8fe373937","name":{"pool_name":"oxp_025dfc06-5aeb-407f-adc8-ba18dc9bba35","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::9]:32345"},"services":[{"id":"c44ce6be-512d-4104-9260-a5b8fe373937","details":{"type":"crucible","address":"[fd00:1122:3344:10e::9]:32345"}}]},"root":"/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone"},{"zone":{"id":"1cfdb5b6-e568-436a-a85f-7fecf1b8eef2","zone_type":"nexus","addresses":["fd00:1122:3344:10e::3"],"dataset":null,"services":[{"id":"1cfdb5b6-e568-436a-a85f-7fecf1b8eef2","details":{"type":"nexus","internal_address":"[fd00:1122:3344:10e::3]:12221","external_ip":"45.154.216.36","nic":{"id":"569754a2-a5e0-4aa8-90a7-2fa65f43b667","kind":{"type":"service","id":"1cfdb5b6-e568-436a-a85f-7fecf1b8eef2"},"name":"nexus-1cfdb5b6-e568-436a-a85f-7fecf1b8eef2","ip":"172.30.2.6","mac":"A8:40:25:FF:EC:6B","subnet":"172.30.2.0/24","vni":100,"primary":true,"slot":0},"external_tls":true,"external_dns_servers":["1.1.1.1","8.8.8.8"]}}]},"root":"/pool/ext/025dfc06-5aeb-407f-adc8-ba18dc9bba35/crypt/zone"},{"zone":{"id":"44a68792-ca14-442e-b7a9-11970d50ba0e","zone_type":"crucible","addresses":["fd00:1122:3344:10e::a"],"dataset":{"id":"44a68792-ca14-442e-b7a9-11970d50ba0e","name":{"pool_name":"oxp_2a492098-7df3-4409-9466-561edb7aa99b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::a]:32345"},"services":[{"id":"44a68792-ca14-442e-b7a9-11970d50ba0e","details":{"type":"crucible","address":"[fd00:1122:3344:10e::a]:32345"}}]},"root":"/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone"},{"zone":{"id":"514cf0ca-6d23-434e-9785-446b83b2f029","zone_type":"crucible","addresses":["fd00:1122:3344:10e::7"],"dataset":{"id":"514cf0ca-6d23-434e-9785-446b83b2f029","name":{"pool_name":"oxp_5b88e44e-f886-4de8-8a6b-48ea5ed9d70b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::7]:32345"},"services":[{"id":"514cf0ca-6d23-434e-9785-446b83b2f029","details":{"type":"crucible","address":"[fd00:1122:3344:10e::7]:32345"}}]},"root":"/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone"},{"zone":{"id":"bc6d8347-8f64-4031-912c-932349df07fe","zone_type":"crucible","addresses":["fd00:1122:3344:10e::6"],"dataset":{"id":"bc6d8347-8f64-4031-912c-932349df07fe","name":{"pool_name":"oxp_1544ce68-3544-4cba-b3b6-1927d08b78a5","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::6]:32345"},"services":[{"id":"bc6d8347-8f64-4031-912c-932349df07fe","details":{"type":"crucible","address":"[fd00:1122:3344:10e::6]:32345"}}]},"root":"/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone"},{"zone":{"id":"1ab0a4f5-99ad-4341-8c89-7fd03e5ccb08","zone_type":"crucible","addresses":["fd00:1122:3344:10e::b"],"dataset":{"id":"1ab0a4f5-99ad-4341-8c89-7fd03e5ccb08","name":{"pool_name":"oxp_033eb462-968f-42ce-9c29-377bd40a3014","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::b]:32345"},"services":[{"id":"1ab0a4f5-99ad-4341-8c89-7fd03e5ccb08","details":{"type":"crucible","address":"[fd00:1122:3344:10e::b]:32345"}}]},"root":"/pool/ext/9e1a0803-7453-4eac-91c9-d7891ecd634f/crypt/zone"},{"zone":{"id":"d6f2520b-3d04-44d9-bd46-6ffccfcb46d2","zone_type":"crucible","addresses":["fd00:1122:3344:10e::8"],"dataset":{"id":"d6f2520b-3d04-44d9-bd46-6ffccfcb46d2","name":{"pool_name":"oxp_36e8d29c-1e88-4c2b-8f59-f312201067c3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::8]:32345"},"services":[{"id":"d6f2520b-3d04-44d9-bd46-6ffccfcb46d2","details":{"type":"crucible","address":"[fd00:1122:3344:10e::8]:32345"}}]},"root":"/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone"},{"zone":{"id":"d6da9d13-bfcf-469d-a99e-faeb5e30be32","zone_type":"crucible","addresses":["fd00:1122:3344:10e::c"],"dataset":{"id":"d6da9d13-bfcf-469d-a99e-faeb5e30be32","name":{"pool_name":"oxp_9e1a0803-7453-4eac-91c9-d7891ecd634f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::c]:32345"},"services":[{"id":"d6da9d13-bfcf-469d-a99e-faeb5e30be32","details":{"type":"crucible","address":"[fd00:1122:3344:10e::c]:32345"}}]},"root":"/pool/ext/8d798756-7200-4db4-9faf-f41b75106a63/crypt/zone"},{"zone":{"id":"a1dc59c2-5883-4fb8-83be-ac2d95d255d1","zone_type":"crucible","addresses":["fd00:1122:3344:10e::d"],"dataset":{"id":"a1dc59c2-5883-4fb8-83be-ac2d95d255d1","name":{"pool_name":"oxp_8d798756-7200-4db4-9faf-f41b75106a63","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10e::d]:32345"},"services":[{"id":"a1dc59c2-5883-4fb8-83be-ac2d95d255d1","details":{"type":"crucible","address":"[fd00:1122:3344:10e::d]:32345"}}]},"root":"/pool/ext/36e8d29c-1e88-4c2b-8f59-f312201067c3/crypt/zone"},{"zone":{"id":"48f25dba-7392-44ce-9bb0-28489ebc44bc","zone_type":"ntp","addresses":["fd00:1122:3344:10e::e"],"dataset":null,"services":[{"id":"48f25dba-7392-44ce-9bb0-28489ebc44bc","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10e::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/old-service-ledgers/rack3-sled9.json b/sled-agent/tests/old-service-ledgers/rack3-sled9.json new file mode 100644 index 0000000000..c225f50081 --- /dev/null +++ b/sled-agent/tests/old-service-ledgers/rack3-sled9.json @@ -0,0 +1 @@ +{"generation":4,"requests":[{"zone":{"id":"b452e5e1-ab4c-4994-9679-ef21b3b4fee9","zone_type":"crucible","addresses":["fd00:1122:3344:10b::6"],"dataset":{"id":"b452e5e1-ab4c-4994-9679-ef21b3b4fee9","name":{"pool_name":"oxp_d63a297d-ae6a-4072-9dca-dda404044989","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::6]:32345"},"services":[{"id":"b452e5e1-ab4c-4994-9679-ef21b3b4fee9","details":{"type":"crucible","address":"[fd00:1122:3344:10b::6]:32345"}}]},"root":"/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone"},{"zone":{"id":"e9826cdc-6d3a-4eff-b1b5-ec4364ebe6b9","zone_type":"oximeter","addresses":["fd00:1122:3344:10b::3"],"dataset":null,"services":[{"id":"e9826cdc-6d3a-4eff-b1b5-ec4364ebe6b9","details":{"type":"oximeter","address":"[fd00:1122:3344:10b::3]:12223"}}]},"root":"/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone"},{"zone":{"id":"b0cde4a8-f27c-46e8-8355-756be9045afc","zone_type":"crucible","addresses":["fd00:1122:3344:10b::b"],"dataset":{"id":"b0cde4a8-f27c-46e8-8355-756be9045afc","name":{"pool_name":"oxp_07c1a8e7-51f5-4f12-a43d-734719fef92b","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::b]:32345"},"services":[{"id":"b0cde4a8-f27c-46e8-8355-756be9045afc","details":{"type":"crucible","address":"[fd00:1122:3344:10b::b]:32345"}}]},"root":"/pool/ext/1f6adf64-c9b9-4ed7-b3e2-37fb25624646/crypt/zone"},{"zone":{"id":"e2f70cf6-e285-4212-9b01-77ebf2ca9219","zone_type":"crucible","addresses":["fd00:1122:3344:10b::d"],"dataset":{"id":"e2f70cf6-e285-4212-9b01-77ebf2ca9219","name":{"pool_name":"oxp_a809f28a-7f25-4362-bc56-0cbdd72af2cb","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::d]:32345"},"services":[{"id":"e2f70cf6-e285-4212-9b01-77ebf2ca9219","details":{"type":"crucible","address":"[fd00:1122:3344:10b::d]:32345"}}]},"root":"/pool/ext/92a1bd39-6e8a-4226-b9d0-e3e8a9b8504f/crypt/zone"},{"zone":{"id":"b0949c9d-4aa1-4bc4-9cb3-5875b9166885","zone_type":"crucible","addresses":["fd00:1122:3344:10b::a"],"dataset":{"id":"b0949c9d-4aa1-4bc4-9cb3-5875b9166885","name":{"pool_name":"oxp_af0cc12b-43c5-473a-89a7-28351fbbb430","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::a]:32345"},"services":[{"id":"b0949c9d-4aa1-4bc4-9cb3-5875b9166885","details":{"type":"crucible","address":"[fd00:1122:3344:10b::a]:32345"}}]},"root":"/pool/ext/cf1594ed-7c0c-467c-b0af-a689dcb427a3/crypt/zone"},{"zone":{"id":"7cea4d59-a8ca-4826-901d-8d5bd935dc09","zone_type":"crucible","addresses":["fd00:1122:3344:10b::9"],"dataset":{"id":"7cea4d59-a8ca-4826-901d-8d5bd935dc09","name":{"pool_name":"oxp_d75dae09-4992-4a61-ab7d-5ae1d2b068ba","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::9]:32345"},"services":[{"id":"7cea4d59-a8ca-4826-901d-8d5bd935dc09","details":{"type":"crucible","address":"[fd00:1122:3344:10b::9]:32345"}}]},"root":"/pool/ext/a809f28a-7f25-4362-bc56-0cbdd72af2cb/crypt/zone"},{"zone":{"id":"08adaeee-c3b5-4cd8-8fbd-ac371b3101c9","zone_type":"crucible","addresses":["fd00:1122:3344:10b::4"],"dataset":{"id":"08adaeee-c3b5-4cd8-8fbd-ac371b3101c9","name":{"pool_name":"oxp_d9f23187-fbf9-4ea5-a103-bc112263a9a7","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::4]:32345"},"services":[{"id":"08adaeee-c3b5-4cd8-8fbd-ac371b3101c9","details":{"type":"crucible","address":"[fd00:1122:3344:10b::4]:32345"}}]},"root":"/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone"},{"zone":{"id":"3da1ade5-3fcb-4e64-aa08-81ee8a9ef723","zone_type":"crucible","addresses":["fd00:1122:3344:10b::8"],"dataset":{"id":"3da1ade5-3fcb-4e64-aa08-81ee8a9ef723","name":{"pool_name":"oxp_1f6adf64-c9b9-4ed7-b3e2-37fb25624646","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::8]:32345"},"services":[{"id":"3da1ade5-3fcb-4e64-aa08-81ee8a9ef723","details":{"type":"crucible","address":"[fd00:1122:3344:10b::8]:32345"}}]},"root":"/pool/ext/07c1a8e7-51f5-4f12-a43d-734719fef92b/crypt/zone"},{"zone":{"id":"816f26a7-4c28-4a39-b9ad-a036678520ab","zone_type":"crucible","addresses":["fd00:1122:3344:10b::7"],"dataset":{"id":"816f26a7-4c28-4a39-b9ad-a036678520ab","name":{"pool_name":"oxp_92a1bd39-6e8a-4226-b9d0-e3e8a9b8504f","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::7]:32345"},"services":[{"id":"816f26a7-4c28-4a39-b9ad-a036678520ab","details":{"type":"crucible","address":"[fd00:1122:3344:10b::7]:32345"}}]},"root":"/pool/ext/d9f23187-fbf9-4ea5-a103-bc112263a9a7/crypt/zone"},{"zone":{"id":"839f9839-409f-45d3-b8a6-7085507b90f6","zone_type":"crucible","addresses":["fd00:1122:3344:10b::c"],"dataset":{"id":"839f9839-409f-45d3-b8a6-7085507b90f6","name":{"pool_name":"oxp_7c204111-31df-4c32-9a3e-780411f700fd","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::c]:32345"},"services":[{"id":"839f9839-409f-45d3-b8a6-7085507b90f6","details":{"type":"crucible","address":"[fd00:1122:3344:10b::c]:32345"}}]},"root":"/pool/ext/af0cc12b-43c5-473a-89a7-28351fbbb430/crypt/zone"},{"zone":{"id":"c717c81f-a228-4412-a34e-90f8c491d847","zone_type":"crucible","addresses":["fd00:1122:3344:10b::5"],"dataset":{"id":"c717c81f-a228-4412-a34e-90f8c491d847","name":{"pool_name":"oxp_cf1594ed-7c0c-467c-b0af-a689dcb427a3","kind":{"type":"crucible"}},"service_address":"[fd00:1122:3344:10b::5]:32345"},"services":[{"id":"c717c81f-a228-4412-a34e-90f8c491d847","details":{"type":"crucible","address":"[fd00:1122:3344:10b::5]:32345"}}]},"root":"/pool/ext/d63a297d-ae6a-4072-9dca-dda404044989/crypt/zone"},{"zone":{"id":"e1fa2023-6c86-40a4-ae59-a0de112cf7a9","zone_type":"ntp","addresses":["fd00:1122:3344:10b::e"],"dataset":null,"services":[{"id":"e1fa2023-6c86-40a4-ae59-a0de112cf7a9","details":{"type":"internal_ntp","address":"[fd00:1122:3344:10b::e]:123","ntp_servers":["440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal","cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal"],"dns_servers":["fd00:1122:3344:1::1","fd00:1122:3344:2::1","fd00:1122:3344:3::1"],"domain":null}}]},"root":"/pool/ext/d9f23187-fbf9-4ea5-a103-bc112263a9a7/crypt/zone"}]} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled10.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled10.json new file mode 100644 index 0000000000..c00a65e8ea --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled10.json @@ -0,0 +1,195 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "04eef8aa-055c-42ab-bdb6-c982f63c9be0", + "underlay_address": "fd00:1122:3344:107::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::d]:32345", + "dataset": { + "pool_name": "oxp_845ff39a-3205-416f-8bda-e35829107c8a" + } + } + }, + "root": "/pool/ext/43efdd6d-7419-437a-a282-fc45bfafd042/crypt/zone" + }, + { + "zone": { + "id": "8568c997-fbbb-46a8-8549-b78284530ffc", + "underlay_address": "fd00:1122:3344:107::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::5]:32345", + "dataset": { + "pool_name": "oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae" + } + } + }, + "root": "/pool/ext/9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf/crypt/zone" + }, + { + "zone": { + "id": "6cec1d60-5c1a-4c1b-9632-2b4bc76bd37c", + "underlay_address": "fd00:1122:3344:107::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::e]:32345", + "dataset": { + "pool_name": "oxp_62a4c68a-2073-42d0-8e49-01f5e8b90cd4" + } + } + }, + "root": "/pool/ext/845ff39a-3205-416f-8bda-e35829107c8a/crypt/zone" + }, + { + "zone": { + "id": "aa646c82-c6d7-4d0c-8401-150130927759", + "underlay_address": "fd00:1122:3344:107::4", + "zone_type": { + "type": "clickhouse", + "address": "[fd00:1122:3344:107::4]:8123", + "dataset": { + "pool_name": "oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae" + } + } + }, + "root": "/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone" + }, + { + "zone": { + "id": "2f294ca1-7a4f-468f-8966-2b7915804729", + "underlay_address": "fd00:1122:3344:107::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::7]:32345", + "dataset": { + "pool_name": "oxp_43efdd6d-7419-437a-a282-fc45bfafd042" + } + } + }, + "root": "/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone" + }, + { + "zone": { + "id": "1a77bd1d-4fd4-4d6c-a105-17f942d94ba6", + "underlay_address": "fd00:1122:3344:107::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::c]:32345", + "dataset": { + "pool_name": "oxp_b6bdfdaf-9c0d-4b74-926c-49ff3ed05562" + } + } + }, + "root": "/pool/ext/9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf/crypt/zone" + }, + { + "zone": { + "id": "f65a6668-1aea-4deb-81ed-191fbe469328", + "underlay_address": "fd00:1122:3344:107::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::9]:32345", + "dataset": { + "pool_name": "oxp_9b61d4b2-66f6-459f-86f4-13d0b8c5d6cf" + } + } + }, + "root": "/pool/ext/d0584f4a-20ba-436d-a75b-7709e80deb79/crypt/zone" + }, + { + "zone": { + "id": "ee8bce67-8f8e-4221-97b0-85f1860d66d0", + "underlay_address": "fd00:1122:3344:107::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::8]:32345", + "dataset": { + "pool_name": "oxp_b252b176-3974-436a-915b-60382b21eb76" + } + } + }, + "root": "/pool/ext/b6bdfdaf-9c0d-4b74-926c-49ff3ed05562/crypt/zone" + }, + { + "zone": { + "id": "cf3b2d54-5e36-4c93-b44f-8bf36ac98071", + "underlay_address": "fd00:1122:3344:107::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::b]:32345", + "dataset": { + "pool_name": "oxp_d0584f4a-20ba-436d-a75b-7709e80deb79" + } + } + }, + "root": "/pool/ext/4c157f35-865d-4310-9d81-c6259cb69293/crypt/zone" + }, + { + "zone": { + "id": "5c8c244c-00dc-4b16-aa17-6d9eb4827fab", + "underlay_address": "fd00:1122:3344:107::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::a]:32345", + "dataset": { + "pool_name": "oxp_4c157f35-865d-4310-9d81-c6259cb69293" + } + } + }, + "root": "/pool/ext/845ff39a-3205-416f-8bda-e35829107c8a/crypt/zone" + }, + { + "zone": { + "id": "7d5e942b-926c-442d-937a-76cc4aa72bf3", + "underlay_address": "fd00:1122:3344:107::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::6]:32345", + "dataset": { + "pool_name": "oxp_fd82dcc7-00dd-4d01-826a-937a7d8238fb" + } + } + }, + "root": "/pool/ext/b252b176-3974-436a-915b-60382b21eb76/crypt/zone" + }, + { + "zone": { + "id": "a3628a56-6f85-43b5-be50-71d8f0e04877", + "underlay_address": "fd00:1122:3344:107::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:107::3]:32221", + "dataset": { + "pool_name": "oxp_0e485ad3-04e6-404b-b619-87d4fea9f5ae" + } + } + }, + "root": "/pool/ext/4c157f35-865d-4310-9d81-c6259cb69293/crypt/zone" + }, + { + "zone": { + "id": "7529be1c-ca8b-441a-89aa-37166cc450df", + "underlay_address": "fd00:1122:3344:107::f", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:107::f]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/fd82dcc7-00dd-4d01-826a-937a7d8238fb/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled11.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled11.json new file mode 100644 index 0000000000..79aae3e8c1 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled11.json @@ -0,0 +1,196 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "605be8b9-c652-4a5f-94ca-068ec7a39472", + "underlay_address": "fd00:1122:3344:106::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::a]:32345", + "dataset": { + "pool_name": "oxp_cf14d1b9-b4db-4594-b3ab-a9957e770ce9" + } + } + }, + "root": "/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone" + }, + { + "zone": { + "id": "af8a8712-457c-4ea7-a8b6-aecb04761c1b", + "underlay_address": "fd00:1122:3344:106::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::9]:32345", + "dataset": { + "pool_name": "oxp_cf5f8849-0c5a-475b-8683-6d17da88d1d1" + } + } + }, + "root": "/pool/ext/7f778610-7328-4554-98f6-b17f74f551c7/crypt/zone" + }, + { + "zone": { + "id": "0022703b-dcfc-44d4-897a-b42f6f53b433", + "underlay_address": "fd00:1122:3344:106::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::c]:32345", + "dataset": { + "pool_name": "oxp_025725fa-9e40-4b46-b018-c420408394ef" + } + } + }, + "root": "/pool/ext/025725fa-9e40-4b46-b018-c420408394ef/crypt/zone" + }, + { + "zone": { + "id": "fffddf56-10ca-4b62-9be3-5b3764a5f682", + "underlay_address": "fd00:1122:3344:106::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::d]:32345", + "dataset": { + "pool_name": "oxp_4d2f5aaf-eb14-4b1e-aa99-ae38ec844605" + } + } + }, + "root": "/pool/ext/834c9aad-c53b-4357-bc3f-f422efa63848/crypt/zone" + }, + { + "zone": { + "id": "9b8194ee-917d-4abc-a55c-94cea6cdaea1", + "underlay_address": "fd00:1122:3344:106::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::6]:32345", + "dataset": { + "pool_name": "oxp_d7665e0d-9354-4341-a76f-965d7c49f277" + } + } + }, + "root": "/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone" + }, + { + "zone": { + "id": "b369e133-485c-4d98-8fee-83542d1fd94d", + "underlay_address": "fd00:1122:3344:106::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::4]:32345", + "dataset": { + "pool_name": "oxp_4366f80d-3902-4b93-8f2d-380008e805fc" + } + } + }, + "root": "/pool/ext/025725fa-9e40-4b46-b018-c420408394ef/crypt/zone" + }, + { + "zone": { + "id": "edd99650-5df1-4241-815d-253e4ef2399c", + "underlay_address": "fd00:1122:3344:106::3", + "zone_type": { + "type": "external_dns", + "dataset": { + "pool_name": "oxp_4366f80d-3902-4b93-8f2d-380008e805fc" + }, + "http_address": "[fd00:1122:3344:106::3]:5353", + "dns_address": "172.20.26.1:53", + "nic": { + "id": "99b759fc-8e2e-44b7-aca8-93c3b201974d", + "kind": { + "type": "service", + "id": "edd99650-5df1-4241-815d-253e4ef2399c" + }, + "name": "external-dns-edd99650-5df1-4241-815d-253e4ef2399c", + "ip": "172.30.1.5", + "mac": "A8:40:25:FF:B0:9C", + "subnet": "172.30.1.0/24", + "vni": 100, + "primary": true, + "slot": 0 + } + } + }, + "root": "/pool/ext/7f778610-7328-4554-98f6-b17f74f551c7/crypt/zone" + }, + { + "zone": { + "id": "46d1afcc-cc3f-4b17-aafc-054dd4862d15", + "underlay_address": "fd00:1122:3344:106::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::5]:32345", + "dataset": { + "pool_name": "oxp_7f778610-7328-4554-98f6-b17f74f551c7" + } + } + }, + "root": "/pool/ext/cf5f8849-0c5a-475b-8683-6d17da88d1d1/crypt/zone" + }, + { + "zone": { + "id": "12afe1c3-bfe6-4278-8240-91d401347d36", + "underlay_address": "fd00:1122:3344:106::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::8]:32345", + "dataset": { + "pool_name": "oxp_534bcd4b-502f-4109-af6e-4b28a22c20f1" + } + } + }, + "root": "/pool/ext/4366f80d-3902-4b93-8f2d-380008e805fc/crypt/zone" + }, + { + "zone": { + "id": "c33b5912-9985-43ed-98f2-41297e2b796a", + "underlay_address": "fd00:1122:3344:106::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::b]:32345", + "dataset": { + "pool_name": "oxp_834c9aad-c53b-4357-bc3f-f422efa63848" + } + } + }, + "root": "/pool/ext/d7665e0d-9354-4341-a76f-965d7c49f277/crypt/zone" + }, + { + "zone": { + "id": "65b3db59-9361-4100-9cee-04e32a8c67d3", + "underlay_address": "fd00:1122:3344:106::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::7]:32345", + "dataset": { + "pool_name": "oxp_32b5303f-f667-4345-84d2-c7eec63b91b2" + } + } + }, + "root": "/pool/ext/d7665e0d-9354-4341-a76f-965d7c49f277/crypt/zone" + }, + { + "zone": { + "id": "82500cc9-f33d-4d59-9e6e-d70ea6133077", + "underlay_address": "fd00:1122:3344:106::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:106::e]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/cf14d1b9-b4db-4594-b3ab-a9957e770ce9/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled12.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled12.json new file mode 100644 index 0000000000..39ebad3183 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled12.json @@ -0,0 +1,232 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "a76b3357-b690-43b8-8352-3300568ffc2b", + "underlay_address": "fd00:1122:3344:104::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::a]:32345", + "dataset": { + "pool_name": "oxp_05715ad8-59a1-44ab-ad5f-0cdffb46baab" + } + } + }, + "root": "/pool/ext/2ec2a731-3340-4777-b1bb-4a906c598174/crypt/zone" + }, + { + "zone": { + "id": "8d202759-ca06-4383-b50f-7f3ec4062bf7", + "underlay_address": "fd00:1122:3344:104::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::4]:32345", + "dataset": { + "pool_name": "oxp_56e32a8f-0877-4437-9cab-94a4928b1495" + } + } + }, + "root": "/pool/ext/613b58fc-5a80-42dc-a61c-b143cf220fb5/crypt/zone" + }, + { + "zone": { + "id": "fcdda266-fc6a-4518-89db-aec007a4b682", + "underlay_address": "fd00:1122:3344:104::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::b]:32345", + "dataset": { + "pool_name": "oxp_7e1293ad-b903-4054-aeae-2182d5e4a785" + } + } + }, + "root": "/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone" + }, + { + "zone": { + "id": "167cf6a2-ec51-4de2-bc6c-7785bbc0e436", + "underlay_address": "fd00:1122:3344:104::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::c]:32345", + "dataset": { + "pool_name": "oxp_f96c8d49-fdf7-4bd6-84f6-c282202d1abc" + } + } + }, + "root": "/pool/ext/56e32a8f-0877-4437-9cab-94a4928b1495/crypt/zone" + }, + { + "zone": { + "id": "c6fde82d-8dae-4ef0-b557-6c3d094d9454", + "underlay_address": "fd00:1122:3344:104::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::9]:32345", + "dataset": { + "pool_name": "oxp_416fd29e-d3b5-4fdf-8101-d0d163fa0706" + } + } + }, + "root": "/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone" + }, + { + "zone": { + "id": "650f5da7-86a0-4ade-af0f-bc96e021ded0", + "underlay_address": "fd00:1122:3344:104::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::5]:32345", + "dataset": { + "pool_name": "oxp_b4a71d3d-1ecd-418a-9a52-8d118f82082b" + } + } + }, + "root": "/pool/ext/613b58fc-5a80-42dc-a61c-b143cf220fb5/crypt/zone" + }, + { + "zone": { + "id": "7ce9a2c5-2d37-4188-b7b5-a9db819396c3", + "underlay_address": "fd00:1122:3344:104::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::d]:32345", + "dataset": { + "pool_name": "oxp_c87d16b8-e814-4159-8562-f8d7fdd19d13" + } + } + }, + "root": "/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone" + }, + { + "zone": { + "id": "23e1cf01-70ab-422f-997b-6216158965c3", + "underlay_address": "fd00:1122:3344:104::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::8]:32345", + "dataset": { + "pool_name": "oxp_3af01cc4-1f16-47d9-a489-abafcb91c2db" + } + } + }, + "root": "/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone" + }, + { + "zone": { + "id": "50209816-89fb-48ed-9595-16899d114844", + "underlay_address": "fd00:1122:3344:104::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::6]:32345", + "dataset": { + "pool_name": "oxp_2ec2a731-3340-4777-b1bb-4a906c598174" + } + } + }, + "root": "/pool/ext/416fd29e-d3b5-4fdf-8101-d0d163fa0706/crypt/zone" + }, + { + "zone": { + "id": "20b100d0-84c3-4119-aa9b-0c632b0b6a3a", + "underlay_address": "fd00:1122:3344:104::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:104::3]:12221", + "external_ip": "172.20.26.4", + "nic": { + "id": "364b0ecd-bf08-4cac-a993-bbf4a70564c7", + "kind": { + "type": "service", + "id": "20b100d0-84c3-4119-aa9b-0c632b0b6a3a" + }, + "name": "nexus-20b100d0-84c3-4119-aa9b-0c632b0b6a3a", + "ip": "172.30.2.6", + "mac": "A8:40:25:FF:B4:C1", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ] + } + }, + "root": "/pool/ext/c87d16b8-e814-4159-8562-f8d7fdd19d13/crypt/zone" + }, + { + "zone": { + "id": "8bc0f29e-0c20-437e-b8ca-7b9844acda22", + "underlay_address": "fd00:1122:3344:104::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::7]:32345", + "dataset": { + "pool_name": "oxp_613b58fc-5a80-42dc-a61c-b143cf220fb5" + } + } + }, + "root": "/pool/ext/56e32a8f-0877-4437-9cab-94a4928b1495/crypt/zone" + }, + { + "zone": { + "id": "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55", + "underlay_address": "fd00:1122:3344:104::e", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:104::e]:123", + "ntp_servers": [ + "ntp.eng.oxide.computer" + ], + "dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ], + "domain": null, + "nic": { + "id": "a4b9bacf-6c04-431a-81ad-9bf0302af96e", + "kind": { + "type": "service", + "id": "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55" + }, + "name": "ntp-c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55", + "ip": "172.30.3.5", + "mac": "A8:40:25:FF:B2:52", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "172.20.26.6", + "first_port": 0, + "last_port": 16383 + } + } + }, + "root": "/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone" + }, + { + "zone": { + "id": "51c9ad09-7814-4643-8ad4-689ccbe53fbd", + "underlay_address": "fd00:1122:3344:1::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_56e32a8f-0877-4437-9cab-94a4928b1495" + }, + "http_address": "[fd00:1122:3344:1::1]:5353", + "dns_address": "[fd00:1122:3344:1::1]:53", + "gz_address": "fd00:1122:3344:1::2", + "gz_address_index": 0 + } + }, + "root": "/pool/ext/3af01cc4-1f16-47d9-a489-abafcb91c2db/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled14.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled14.json new file mode 100644 index 0000000000..25dfb72a78 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled14.json @@ -0,0 +1,192 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "ee8b2cfa-87fe-46a6-98ef-23640b80a968", + "underlay_address": "fd00:1122:3344:10b::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::d]:32345", + "dataset": { + "pool_name": "oxp_4a624324-003a-4255-98e8-546a90b5b7fa" + } + } + }, + "root": "/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone" + }, + { + "zone": { + "id": "9228f8ca-2a83-439f-9cb7-f2801b5fea27", + "underlay_address": "fd00:1122:3344:10b::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::6]:32345", + "dataset": { + "pool_name": "oxp_6b9ec5f1-859f-459c-9c06-6a51ba87786f" + } + } + }, + "root": "/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone" + }, + { + "zone": { + "id": "ee44cdde-7ac9-4469-9f1d-e8bcfeb5cc46", + "underlay_address": "fd00:1122:3344:10b::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::e]:32345", + "dataset": { + "pool_name": "oxp_11b02ce7-7e50-486f-86c2-de8af9575a45" + } + } + }, + "root": "/pool/ext/11b02ce7-7e50-486f-86c2-de8af9575a45/crypt/zone" + }, + { + "zone": { + "id": "96bac0b1-8b34-4c81-9e76-6404d2c37630", + "underlay_address": "fd00:1122:3344:10b::4", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:10b::4]:17000" + } + }, + "root": "/pool/ext/350b2814-7b7f-40f1-9bf6-9818a1ef49bb/crypt/zone" + }, + { + "zone": { + "id": "d4e1e554-7b98-4413-809e-4a42561c3d0c", + "underlay_address": "fd00:1122:3344:10b::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::a]:32345", + "dataset": { + "pool_name": "oxp_e6d2fe1d-c74d-40cd-8fae-bc7d06bdaac8" + } + } + }, + "root": "/pool/ext/6b9ec5f1-859f-459c-9c06-6a51ba87786f/crypt/zone" + }, + { + "zone": { + "id": "1dd69b02-a032-46c3-8e2a-5012e8314455", + "underlay_address": "fd00:1122:3344:10b::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::b]:32345", + "dataset": { + "pool_name": "oxp_350b2814-7b7f-40f1-9bf6-9818a1ef49bb" + } + } + }, + "root": "/pool/ext/350b2814-7b7f-40f1-9bf6-9818a1ef49bb/crypt/zone" + }, + { + "zone": { + "id": "921f7752-d2f3-40df-a739-5cb1390abc2c", + "underlay_address": "fd00:1122:3344:10b::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::8]:32345", + "dataset": { + "pool_name": "oxp_2d1ebe24-6deb-4f81-8450-6842de28126c" + } + } + }, + "root": "/pool/ext/91ea7bb6-2be7-4498-9b0d-a0521509ec00/crypt/zone" + }, + { + "zone": { + "id": "609b25e8-9750-4308-ae6f-7202907a3675", + "underlay_address": "fd00:1122:3344:10b::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::9]:32345", + "dataset": { + "pool_name": "oxp_91ea7bb6-2be7-4498-9b0d-a0521509ec00" + } + } + }, + "root": "/pool/ext/2d1ebe24-6deb-4f81-8450-6842de28126c/crypt/zone" + }, + { + "zone": { + "id": "a232eba2-e94f-4592-a5a6-ec23f9be3296", + "underlay_address": "fd00:1122:3344:10b::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::5]:32345", + "dataset": { + "pool_name": "oxp_e12f29b8-1ab8-431e-bc96-1c1298947980" + } + } + }, + "root": "/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/zone" + }, + { + "zone": { + "id": "800d1758-9312-4b1a-8f02-dc6d644c2a9b", + "underlay_address": "fd00:1122:3344:10b::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::c]:32345", + "dataset": { + "pool_name": "oxp_b6932bb0-bab8-4876-914a-9c75a600e794" + } + } + }, + "root": "/pool/ext/b6932bb0-bab8-4876-914a-9c75a600e794/crypt/zone" + }, + { + "zone": { + "id": "668a4d4a-96dc-4b45-866b-bed3d64c26ec", + "underlay_address": "fd00:1122:3344:10b::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::7]:32345", + "dataset": { + "pool_name": "oxp_021afd19-2f87-4def-9284-ab7add1dd6ae" + } + } + }, + "root": "/pool/ext/91ea7bb6-2be7-4498-9b0d-a0521509ec00/crypt/zone" + }, + { + "zone": { + "id": "8bbea076-ff60-4330-8302-383e18140ef3", + "underlay_address": "fd00:1122:3344:10b::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:10b::3]:32221", + "dataset": { + "pool_name": "oxp_e12f29b8-1ab8-431e-bc96-1c1298947980" + } + } + }, + "root": "/pool/ext/4a624324-003a-4255-98e8-546a90b5b7fa/crypt/zone" + }, + { + "zone": { + "id": "3ccea933-89f2-4ce5-8367-efb0afeffe97", + "underlay_address": "fd00:1122:3344:10b::f", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10b::f]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/4a624324-003a-4255-98e8-546a90b5b7fa/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled16.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled16.json new file mode 100644 index 0000000000..905742e678 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled16.json @@ -0,0 +1,192 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "b12aa520-a769-4eac-b56b-09960550a831", + "underlay_address": "fd00:1122:3344:108::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::7]:32345", + "dataset": { + "pool_name": "oxp_34dadf3f-f60c-4acc-b82b-4b0c82224222" + } + } + }, + "root": "/pool/ext/8be8c577-23ac-452e-a205-6d9c95088f61/crypt/zone" + }, + { + "zone": { + "id": "9bdc40ee-ccba-4d18-9efb-a30596e2d290", + "underlay_address": "fd00:1122:3344:108::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::d]:32345", + "dataset": { + "pool_name": "oxp_eb81728c-3b83-42fb-8133-ac32a0bdf70f" + } + } + }, + "root": "/pool/ext/8be8c577-23ac-452e-a205-6d9c95088f61/crypt/zone" + }, + { + "zone": { + "id": "c9a367c7-64d7-48e4-b484-9ecb4e8faea7", + "underlay_address": "fd00:1122:3344:108::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::9]:32345", + "dataset": { + "pool_name": "oxp_76ab5a67-e20f-4bf0-87b3-01fcc4144bd2" + } + } + }, + "root": "/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone" + }, + { + "zone": { + "id": "bc5124d8-65e8-4879-bfac-64d59003d482", + "underlay_address": "fd00:1122:3344:108::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::a]:32345", + "dataset": { + "pool_name": "oxp_5fac7a1d-e855-46e1-b8c2-dd848ac4fee6" + } + } + }, + "root": "/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone" + }, + { + "zone": { + "id": "5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a", + "underlay_address": "fd00:1122:3344:108::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::c]:32345", + "dataset": { + "pool_name": "oxp_0c4ef358-5533-43db-ad38-a8eff716e53a" + } + } + }, + "root": "/pool/ext/6d3e9cc6-f03b-4055-9785-05711d5e4fdc/crypt/zone" + }, + { + "zone": { + "id": "3b767edf-a72d-4d80-a0fc-65d6801ed0e0", + "underlay_address": "fd00:1122:3344:108::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::e]:32345", + "dataset": { + "pool_name": "oxp_f522118c-5dcd-4116-8044-07f0cceec52e" + } + } + }, + "root": "/pool/ext/5fac7a1d-e855-46e1-b8c2-dd848ac4fee6/crypt/zone" + }, + { + "zone": { + "id": "f3c02ed6-fbc5-45c3-a030-409f74b450fd", + "underlay_address": "fd00:1122:3344:108::4", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:108::4]:17000" + } + }, + "root": "/pool/ext/eb81728c-3b83-42fb-8133-ac32a0bdf70f/crypt/zone" + }, + { + "zone": { + "id": "85bd9bdb-1ec5-4a8d-badb-8b5d502546a1", + "underlay_address": "fd00:1122:3344:108::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::5]:32345", + "dataset": { + "pool_name": "oxp_416232c1-bc8f-403f-bacb-28403dd8fced" + } + } + }, + "root": "/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone" + }, + { + "zone": { + "id": "d2f1c3df-d4e0-4469-b50e-f1871da86ebf", + "underlay_address": "fd00:1122:3344:108::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::6]:32345", + "dataset": { + "pool_name": "oxp_6d3e9cc6-f03b-4055-9785-05711d5e4fdc" + } + } + }, + "root": "/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone" + }, + { + "zone": { + "id": "88fe3c12-4c55-47df-b4ee-ed26b795439d", + "underlay_address": "fd00:1122:3344:108::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::8]:32345", + "dataset": { + "pool_name": "oxp_8be8c577-23ac-452e-a205-6d9c95088f61" + } + } + }, + "root": "/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone" + }, + { + "zone": { + "id": "4d20175a-588b-44b8-8b9c-b16c6c3a97a0", + "underlay_address": "fd00:1122:3344:108::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::b]:32345", + "dataset": { + "pool_name": "oxp_a726cacd-fa35-4ed2-ade6-31ad928b24cb" + } + } + }, + "root": "/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone" + }, + { + "zone": { + "id": "e86845b5-eabd-49f5-9a10-6dfef9066209", + "underlay_address": "fd00:1122:3344:108::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:108::3]:32221", + "dataset": { + "pool_name": "oxp_416232c1-bc8f-403f-bacb-28403dd8fced" + } + } + }, + "root": "/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone" + }, + { + "zone": { + "id": "209b6213-588b-43b6-a89b-19ee5c84ffba", + "underlay_address": "fd00:1122:3344:108::f", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:108::f]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled17.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled17.json new file mode 100644 index 0000000000..1cccd0467b --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled17.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "90b53c3d-42fa-4ca9-bbfc-96fff245b508", + "underlay_address": "fd00:1122:3344:109::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::4]:32345", + "dataset": { + "pool_name": "oxp_ae56280b-17ce-4266-8573-e1da9db6c6bb" + } + } + }, + "root": "/pool/ext/b0e1a261-b932-47c4-81e9-1977275ae9d9/crypt/zone" + }, + { + "zone": { + "id": "4f9f2e1d-be04-4e8b-a50b-ffb18557a650", + "underlay_address": "fd00:1122:3344:109::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::5]:32345", + "dataset": { + "pool_name": "oxp_d5b07362-64db-4b18-a3e9-8d7cbabae2d5" + } + } + }, + "root": "/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone" + }, + { + "zone": { + "id": "2fa5671d-3109-4f11-ae70-1280f4fa3b89", + "underlay_address": "fd00:1122:3344:109::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::6]:32345", + "dataset": { + "pool_name": "oxp_9ba7bfbf-b9a2-4237-a142-94c1e68de984" + } + } + }, + "root": "/pool/ext/3cafbb47-c194-4a42-99ff-34dfeab999ed/crypt/zone" + }, + { + "zone": { + "id": "b63c6882-ca90-4156-b561-4781ab4a0962", + "underlay_address": "fd00:1122:3344:109::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::7]:32345", + "dataset": { + "pool_name": "oxp_b0e1a261-b932-47c4-81e9-1977275ae9d9" + } + } + }, + "root": "/pool/ext/d5b07362-64db-4b18-a3e9-8d7cbabae2d5/crypt/zone" + }, + { + "zone": { + "id": "f71344eb-f7e2-439d-82a0-9941e6868fb6", + "underlay_address": "fd00:1122:3344:109::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::9]:32345", + "dataset": { + "pool_name": "oxp_027a82e8-daa3-4fa6-8205-ed03445e1086" + } + } + }, + "root": "/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone" + }, + { + "zone": { + "id": "a60cf0d7-12d5-43cb-aa3f-7a9e84de08fb", + "underlay_address": "fd00:1122:3344:109::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::a]:32345", + "dataset": { + "pool_name": "oxp_8736aaf9-4d72-42b1-8e4f-07644d999c8b" + } + } + }, + "root": "/pool/ext/8736aaf9-4d72-42b1-8e4f-07644d999c8b/crypt/zone" + }, + { + "zone": { + "id": "5d0e03b2-8958-4c43-8851-bf819f102958", + "underlay_address": "fd00:1122:3344:109::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::8]:32345", + "dataset": { + "pool_name": "oxp_62426615-7832-49e7-9426-e39ffeb42c69" + } + } + }, + "root": "/pool/ext/07fc8ec9-1216-4d98-be34-c2970b585e61/crypt/zone" + }, + { + "zone": { + "id": "accc05a2-ec80-4856-a825-ec6b7f700eaa", + "underlay_address": "fd00:1122:3344:109::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::d]:32345", + "dataset": { + "pool_name": "oxp_dc083c53-7014-4482-8a79-f338ba2b0fb4" + } + } + }, + "root": "/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone" + }, + { + "zone": { + "id": "2e32fdcc-737a-4430-8290-cb7028ea4d50", + "underlay_address": "fd00:1122:3344:109::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::b]:32345", + "dataset": { + "pool_name": "oxp_3cafbb47-c194-4a42-99ff-34dfeab999ed" + } + } + }, + "root": "/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone" + }, + { + "zone": { + "id": "a97c6ae2-37f6-4d93-a66e-cb5cd3c6aaa2", + "underlay_address": "fd00:1122:3344:109::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::c]:32345", + "dataset": { + "pool_name": "oxp_07fc8ec9-1216-4d98-be34-c2970b585e61" + } + } + }, + "root": "/pool/ext/07fc8ec9-1216-4d98-be34-c2970b585e61/crypt/zone" + }, + { + "zone": { + "id": "3237a532-acaa-4ebe-bf11-dde794fea739", + "underlay_address": "fd00:1122:3344:109::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:109::3]:32221", + "dataset": { + "pool_name": "oxp_ae56280b-17ce-4266-8573-e1da9db6c6bb" + } + } + }, + "root": "/pool/ext/027a82e8-daa3-4fa6-8205-ed03445e1086/crypt/zone" + }, + { + "zone": { + "id": "83257100-5590-484a-b72a-a079389d8da6", + "underlay_address": "fd00:1122:3344:109::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:109::e]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/3cafbb47-c194-4a42-99ff-34dfeab999ed/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled21.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled21.json new file mode 100644 index 0000000000..35caa638e8 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled21.json @@ -0,0 +1,232 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "0437b69d-73a8-4231-86f9-6b5556e7e7ef", + "underlay_address": "fd00:1122:3344:102::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::5]:32345", + "dataset": { + "pool_name": "oxp_aa0ffe35-76db-42ab-adf2-ceb072bdf811" + } + } + }, + "root": "/pool/ext/0d2805da-6d24-4e57-a700-0c3865c05544/crypt/zone" + }, + { + "zone": { + "id": "47234ca5-305f-436a-9e9a-36bca9667680", + "underlay_address": "fd00:1122:3344:102::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::b]:32345", + "dataset": { + "pool_name": "oxp_0d2805da-6d24-4e57-a700-0c3865c05544" + } + } + }, + "root": "/pool/ext/160691d8-33a1-4d7d-a48a-c3fd27d76822/crypt/zone" + }, + { + "zone": { + "id": "2898657e-4141-4c05-851b-147bffc6bbbd", + "underlay_address": "fd00:1122:3344:102::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:102::3]:12221", + "external_ip": "172.20.26.5", + "nic": { + "id": "2e9a412e-c79a-48fe-8fa4-f5a6afed1040", + "kind": { + "type": "service", + "id": "2898657e-4141-4c05-851b-147bffc6bbbd" + }, + "name": "nexus-2898657e-4141-4c05-851b-147bffc6bbbd", + "ip": "172.30.2.7", + "mac": "A8:40:25:FF:C6:59", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ] + } + }, + "root": "/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone" + }, + { + "zone": { + "id": "cf98c4d6-4a7b-49c0-9b14-48a8adf52ce9", + "underlay_address": "fd00:1122:3344:102::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::c]:32345", + "dataset": { + "pool_name": "oxp_c0b4ecc1-a145-443f-90d1-2e8136b007bc" + } + } + }, + "root": "/pool/ext/f6acd70a-d6cb-464d-a460-dd5c60301562/crypt/zone" + }, + { + "zone": { + "id": "13c1e91e-bfcc-4eea-8185-412fc37fdea3", + "underlay_address": "fd00:1122:3344:102::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::9]:32345", + "dataset": { + "pool_name": "oxp_e9b0a2e4-8060-41bd-a3b5-d0642246d06d" + } + } + }, + "root": "/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone" + }, + { + "zone": { + "id": "c9cb60af-9e0e-4b3b-b971-53138a9b8d27", + "underlay_address": "fd00:1122:3344:102::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::4]:32345", + "dataset": { + "pool_name": "oxp_77749ec7-39a9-489d-904b-87f7223c4e3c" + } + } + }, + "root": "/pool/ext/77749ec7-39a9-489d-904b-87f7223c4e3c/crypt/zone" + }, + { + "zone": { + "id": "32995cfa-47ec-4b84-8514-7c1c8a86c19d", + "underlay_address": "fd00:1122:3344:102::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::8]:32345", + "dataset": { + "pool_name": "oxp_eac83f81-eb51-4f3e-874e-82f55dd952ba" + } + } + }, + "root": "/pool/ext/0d2805da-6d24-4e57-a700-0c3865c05544/crypt/zone" + }, + { + "zone": { + "id": "b93d2e2d-d54b-4503-85c3-9878e3cee9c7", + "underlay_address": "fd00:1122:3344:102::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::a]:32345", + "dataset": { + "pool_name": "oxp_160691d8-33a1-4d7d-a48a-c3fd27d76822" + } + } + }, + "root": "/pool/ext/138663ad-a382-4595-baf0-08f6b0276a67/crypt/zone" + }, + { + "zone": { + "id": "2ebbac4f-7b0f-43eb-99fd-dd6ff7f9e097", + "underlay_address": "fd00:1122:3344:102::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::6]:32345", + "dataset": { + "pool_name": "oxp_138663ad-a382-4595-baf0-08f6b0276a67" + } + } + }, + "root": "/pool/ext/e9b0a2e4-8060-41bd-a3b5-d0642246d06d/crypt/zone" + }, + { + "zone": { + "id": "d0eea3b2-e5ac-42bf-97b7-531b78fa06d1", + "underlay_address": "fd00:1122:3344:102::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::7]:32345", + "dataset": { + "pool_name": "oxp_69f0b863-f73f-42b2-9822-b2cb99f09003" + } + } + }, + "root": "/pool/ext/138663ad-a382-4595-baf0-08f6b0276a67/crypt/zone" + }, + { + "zone": { + "id": "2b34cd1d-ea7d-41a1-82b9-75550fdf6eb0", + "underlay_address": "fd00:1122:3344:102::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::d]:32345", + "dataset": { + "pool_name": "oxp_f6acd70a-d6cb-464d-a460-dd5c60301562" + } + } + }, + "root": "/pool/ext/c0b4ecc1-a145-443f-90d1-2e8136b007bc/crypt/zone" + }, + { + "zone": { + "id": "6ea2684c-115e-48a6-8453-ab52d1cecd73", + "underlay_address": "fd00:1122:3344:102::e", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:102::e]:123", + "ntp_servers": [ + "ntp.eng.oxide.computer" + ], + "dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ], + "domain": null, + "nic": { + "id": "4effd079-ed4e-4cf6-8545-bb9574f516d2", + "kind": { + "type": "service", + "id": "6ea2684c-115e-48a6-8453-ab52d1cecd73" + }, + "name": "ntp-6ea2684c-115e-48a6-8453-ab52d1cecd73", + "ip": "172.30.3.6", + "mac": "A8:40:25:FF:A0:F9", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "172.20.26.7", + "first_port": 16384, + "last_port": 32767 + } + } + }, + "root": "/pool/ext/aa0ffe35-76db-42ab-adf2-ceb072bdf811/crypt/zone" + }, + { + "zone": { + "id": "3a1ea15f-06a4-4afd-959a-c3a00b2bdd80", + "underlay_address": "fd00:1122:3344:2::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_77749ec7-39a9-489d-904b-87f7223c4e3c" + }, + "http_address": "[fd00:1122:3344:2::1]:5353", + "dns_address": "[fd00:1122:3344:2::1]:53", + "gz_address": "fd00:1122:3344:2::2", + "gz_address_index": 1 + } + }, + "root": "/pool/ext/69f0b863-f73f-42b2-9822-b2cb99f09003/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled23.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled23.json new file mode 100644 index 0000000000..94fcb3a327 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled23.json @@ -0,0 +1,195 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "1876cdcf-b2e7-4b79-ad2e-67df716e1860", + "underlay_address": "fd00:1122:3344:10a::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::8]:32345", + "dataset": { + "pool_name": "oxp_d4c6bdc6-5e99-4f6c-b57a-9bfcb9a76be4" + } + } + }, + "root": "/pool/ext/86c58ea3-1413-4af3-9aff-9c0a3d758459/crypt/zone" + }, + { + "zone": { + "id": "0e708ee3-b7a6-4993-a88a-4489add33e29", + "underlay_address": "fd00:1122:3344:10a::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::d]:32345", + "dataset": { + "pool_name": "oxp_718ad834-b415-4abb-934d-9f987cde0a96" + } + } + }, + "root": "/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone" + }, + { + "zone": { + "id": "4e1b9a65-848f-4649-b360-1df0d135b44d", + "underlay_address": "fd00:1122:3344:10a::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::c]:32345", + "dataset": { + "pool_name": "oxp_88ee08c6-1c0f-44c2-9110-b8d5a7589ebb" + } + } + }, + "root": "/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone" + }, + { + "zone": { + "id": "da510a57-3af1-4d2b-b2ed-2e8849f27d8b", + "underlay_address": "fd00:1122:3344:10a::3", + "zone_type": { + "type": "oximeter", + "address": "[fd00:1122:3344:10a::3]:12223" + } + }, + "root": "/pool/ext/718ad834-b415-4abb-934d-9f987cde0a96/crypt/zone" + }, + { + "zone": { + "id": "d4d9acc8-3e0b-4fab-a0a2-d21920fabd7e", + "underlay_address": "fd00:1122:3344:10a::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::6]:32345", + "dataset": { + "pool_name": "oxp_9dfe424f-cba6-4bfb-a3dd-e8bd7fdea57d" + } + } + }, + "root": "/pool/ext/30f7d236-c835-46cc-bc27-9099a6826f67/crypt/zone" + }, + { + "zone": { + "id": "fcb75972-836b-4f55-ba21-9722832cf5c2", + "underlay_address": "fd00:1122:3344:10a::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::7]:32345", + "dataset": { + "pool_name": "oxp_9005671f-3d90-4ed1-be15-ad65b9a65bd5" + } + } + }, + "root": "/pool/ext/d4c6bdc6-5e99-4f6c-b57a-9bfcb9a76be4/crypt/zone" + }, + { + "zone": { + "id": "624beba0-7dcd-4d55-af05-4670c6fcb1fb", + "underlay_address": "fd00:1122:3344:10a::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::4]:32345", + "dataset": { + "pool_name": "oxp_93867156-a43d-4c03-a899-1535e566c8bd" + } + } + }, + "root": "/pool/ext/93867156-a43d-4c03-a899-1535e566c8bd/crypt/zone" + }, + { + "zone": { + "id": "26fb3830-898e-4086-afaf-8f9654716b8c", + "underlay_address": "fd00:1122:3344:10a::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::b]:32345", + "dataset": { + "pool_name": "oxp_86c58ea3-1413-4af3-9aff-9c0a3d758459" + } + } + }, + "root": "/pool/ext/93867156-a43d-4c03-a899-1535e566c8bd/crypt/zone" + }, + { + "zone": { + "id": "a3ef7eba-c08e-48ef-ae7a-89e2fcb49b66", + "underlay_address": "fd00:1122:3344:10a::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::a]:32345", + "dataset": { + "pool_name": "oxp_cd3fdbae-a9d9-4db7-866a-bca36f6dd634" + } + } + }, + "root": "/pool/ext/718ad834-b415-4abb-934d-9f987cde0a96/crypt/zone" + }, + { + "zone": { + "id": "5c1d4a02-f33b-433a-81f5-5c149e3433bd", + "underlay_address": "fd00:1122:3344:10a::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::5]:32345", + "dataset": { + "pool_name": "oxp_9adfc865-2eef-4880-a6e3-9d2f88c8efd0" + } + } + }, + "root": "/pool/ext/cd3fdbae-a9d9-4db7-866a-bca36f6dd634/crypt/zone" + }, + { + "zone": { + "id": "ee77efe9-81d0-4395-a237-15e30c2c2d04", + "underlay_address": "fd00:1122:3344:10a::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::9]:32345", + "dataset": { + "pool_name": "oxp_30f7d236-c835-46cc-bc27-9099a6826f67" + } + } + }, + "root": "/pool/ext/88ee08c6-1c0f-44c2-9110-b8d5a7589ebb/crypt/zone" + }, + { + "zone": { + "id": "71ab91b7-48d4-4d31-b47e-59f29f419116", + "underlay_address": "fd00:1122:3344:10a::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10a::e]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/cd3fdbae-a9d9-4db7-866a-bca36f6dd634/crypt/zone" + }, + { + "zone": { + "id": "46ccd8fe-4e3c-4307-97ae-1f7ac505082a", + "underlay_address": "fd00:1122:3344:3::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_93867156-a43d-4c03-a899-1535e566c8bd" + }, + "http_address": "[fd00:1122:3344:3::1]:5353", + "dns_address": "[fd00:1122:3344:3::1]:53", + "gz_address": "fd00:1122:3344:3::2", + "gz_address_index": 2 + } + }, + "root": "/pool/ext/9dfe424f-cba6-4bfb-a3dd-e8bd7fdea57d/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled25.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled25.json new file mode 100644 index 0000000000..09a07149cf --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled25.json @@ -0,0 +1,196 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "180d466d-eb36-4546-8922-e52c4c076823", + "underlay_address": "fd00:1122:3344:101::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::5]:32345", + "dataset": { + "pool_name": "oxp_ac789935-fa42-4d00-8967-df0d96dbb74e" + } + } + }, + "root": "/pool/ext/d732addc-cfe8-4c2c-8028-72eb4481b04e/crypt/zone" + }, + { + "zone": { + "id": "b5af0303-bc03-40a3-b733-0396d705dfbf", + "underlay_address": "fd00:1122:3344:101::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::7]:32345", + "dataset": { + "pool_name": "oxp_d732addc-cfe8-4c2c-8028-72eb4481b04e" + } + } + }, + "root": "/pool/ext/677b0057-3a80-461b-aca8-c2cb501a7278/crypt/zone" + }, + { + "zone": { + "id": "9c7c805a-f5ed-4e48-86e3-7aa81a718881", + "underlay_address": "fd00:1122:3344:101::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::c]:32345", + "dataset": { + "pool_name": "oxp_923c930c-80f8-448d-8321-cebfc6c41760" + } + } + }, + "root": "/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone" + }, + { + "zone": { + "id": "4e49c83c-2d4a-491a-91ac-4ab022026dcf", + "underlay_address": "fd00:1122:3344:101::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::4]:32345", + "dataset": { + "pool_name": "oxp_c99e6032-1d4f-47d2-9efe-ae2b2479554e" + } + } + }, + "root": "/pool/ext/653065d2-ab70-47c9-b832-34238fdc95ef/crypt/zone" + }, + { + "zone": { + "id": "0e38475e-b8b2-4813-bf80-3c170081081a", + "underlay_address": "fd00:1122:3344:101::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::d]:32345", + "dataset": { + "pool_name": "oxp_653065d2-ab70-47c9-b832-34238fdc95ef" + } + } + }, + "root": "/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone" + }, + { + "zone": { + "id": "75123e60-1116-4b8d-a466-7302220127da", + "underlay_address": "fd00:1122:3344:101::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::8]:32345", + "dataset": { + "pool_name": "oxp_c764a8ae-6862-4eec-9db0-cc6ea478e4a7" + } + } + }, + "root": "/pool/ext/c764a8ae-6862-4eec-9db0-cc6ea478e4a7/crypt/zone" + }, + { + "zone": { + "id": "fbd0379c-97fa-49ea-8980-17ae30ffff3c", + "underlay_address": "fd00:1122:3344:101::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::b]:32345", + "dataset": { + "pool_name": "oxp_fcb0e4c7-e046-4cf5-ad35-3ad90e1eb90c" + } + } + }, + "root": "/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone" + }, + { + "zone": { + "id": "ec635326-cd1d-4f73-b8e6-c3a36a7020db", + "underlay_address": "fd00:1122:3344:101::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::a]:32345", + "dataset": { + "pool_name": "oxp_6bfb4120-488d-4f3d-90ef-e9bfa523b388" + } + } + }, + "root": "/pool/ext/c99e6032-1d4f-47d2-9efe-ae2b2479554e/crypt/zone" + }, + { + "zone": { + "id": "f500d564-c40a-4eca-ac8a-a26b435f2037", + "underlay_address": "fd00:1122:3344:101::3", + "zone_type": { + "type": "external_dns", + "dataset": { + "pool_name": "oxp_c99e6032-1d4f-47d2-9efe-ae2b2479554e" + }, + "http_address": "[fd00:1122:3344:101::3]:5353", + "dns_address": "172.20.26.2:53", + "nic": { + "id": "b0b42776-3914-4a69-889f-4831dc72327c", + "kind": { + "type": "service", + "id": "f500d564-c40a-4eca-ac8a-a26b435f2037" + }, + "name": "external-dns-f500d564-c40a-4eca-ac8a-a26b435f2037", + "ip": "172.30.1.6", + "mac": "A8:40:25:FF:D0:B4", + "subnet": "172.30.1.0/24", + "vni": 100, + "primary": true, + "slot": 0 + } + } + }, + "root": "/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone" + }, + { + "zone": { + "id": "56d4dbcc-3b4a-4ed0-8795-7734aadcc4c0", + "underlay_address": "fd00:1122:3344:101::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::9]:32345", + "dataset": { + "pool_name": "oxp_4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca" + } + } + }, + "root": "/pool/ext/4c7ad252-55c2-4a1a-9d93-9dfcdfdfacca/crypt/zone" + }, + { + "zone": { + "id": "0d3a1bd5-f6fe-49cb-807a-190dabc90103", + "underlay_address": "fd00:1122:3344:101::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::6]:32345", + "dataset": { + "pool_name": "oxp_677b0057-3a80-461b-aca8-c2cb501a7278" + } + } + }, + "root": "/pool/ext/6bfb4120-488d-4f3d-90ef-e9bfa523b388/crypt/zone" + }, + { + "zone": { + "id": "d34c7184-5d4e-4cb5-8f91-df74a343ffbc", + "underlay_address": "fd00:1122:3344:101::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:101::e]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/ac789935-fa42-4d00-8967-df0d96dbb74e/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled8.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled8.json new file mode 100644 index 0000000000..669889b3c5 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled8.json @@ -0,0 +1,198 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "7153983f-8fd7-4fb9-92ac-0f07a07798b4", + "underlay_address": "fd00:1122:3344:103::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::a]:32345", + "dataset": { + "pool_name": "oxp_bf428719-1b16-4503-99f4-ad95846d916f" + } + } + }, + "root": "/pool/ext/26e698bb-006d-4208-94b9-d1bc279111fa/crypt/zone" + }, + { + "zone": { + "id": "7d44ba36-4a69-490a-bc40-f6f90a4208d4", + "underlay_address": "fd00:1122:3344:103::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::c]:32345", + "dataset": { + "pool_name": "oxp_414e235b-55c3-4dc1-a568-8adf4ea1a052" + } + } + }, + "root": "/pool/ext/cf940e15-dbc5-481b-866a-4de4b018898e/crypt/zone" + }, + { + "zone": { + "id": "65a11c18-7f59-41ac-b9e7-680627f996e7", + "underlay_address": "fd00:1122:3344:103::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:103::3]:12221", + "external_ip": "172.20.26.3", + "nic": { + "id": "a3e13dde-a2bc-4170-ad84-aad8085b6034", + "kind": { + "type": "service", + "id": "65a11c18-7f59-41ac-b9e7-680627f996e7" + }, + "name": "nexus-65a11c18-7f59-41ac-b9e7-680627f996e7", + "ip": "172.30.2.5", + "mac": "A8:40:25:FF:A6:83", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ] + } + }, + "root": "/pool/ext/e126ddcc-8bee-46ba-8199-2a74df0ba040/crypt/zone" + }, + { + "zone": { + "id": "072fdae8-2adf-4fd2-94ce-e9b0663b91e7", + "underlay_address": "fd00:1122:3344:103::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::b]:32345", + "dataset": { + "pool_name": "oxp_26e698bb-006d-4208-94b9-d1bc279111fa" + } + } + }, + "root": "/pool/ext/bf428719-1b16-4503-99f4-ad95846d916f/crypt/zone" + }, + { + "zone": { + "id": "01f93020-7e7d-4185-93fb-6ca234056c82", + "underlay_address": "fd00:1122:3344:103::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::5]:32345", + "dataset": { + "pool_name": "oxp_7b24095a-72df-45e3-984f-2b795e052ac7" + } + } + }, + "root": "/pool/ext/7b24095a-72df-45e3-984f-2b795e052ac7/crypt/zone" + }, + { + "zone": { + "id": "e238116d-e5cc-43d4-9c8a-6f138ae8a15d", + "underlay_address": "fd00:1122:3344:103::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::6]:32345", + "dataset": { + "pool_name": "oxp_e126ddcc-8bee-46ba-8199-2a74df0ba040" + } + } + }, + "root": "/pool/ext/7b24095a-72df-45e3-984f-2b795e052ac7/crypt/zone" + }, + { + "zone": { + "id": "585cd8c5-c41e-4be4-beb8-bfbef9b53856", + "underlay_address": "fd00:1122:3344:103::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::7]:32345", + "dataset": { + "pool_name": "oxp_6340805e-c5af-418d-8bd1-fc0085667f33" + } + } + }, + "root": "/pool/ext/414e235b-55c3-4dc1-a568-8adf4ea1a052/crypt/zone" + }, + { + "zone": { + "id": "0b41c560-3b20-42f4-82ad-92f5bb575d6b", + "underlay_address": "fd00:1122:3344:103::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::9]:32345", + "dataset": { + "pool_name": "oxp_b93f880e-c55b-4d6c-9a16-939d84b628fc" + } + } + }, + "root": "/pool/ext/6340805e-c5af-418d-8bd1-fc0085667f33/crypt/zone" + }, + { + "zone": { + "id": "0ccf27c0-e32d-4b52-a2c5-6db0c64a26f9", + "underlay_address": "fd00:1122:3344:103::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::d]:32345", + "dataset": { + "pool_name": "oxp_2115b084-be0f-4fba-941b-33a659798a9e" + } + } + }, + "root": "/pool/ext/414e235b-55c3-4dc1-a568-8adf4ea1a052/crypt/zone" + }, + { + "zone": { + "id": "a6ba8273-0320-4dab-b801-281f041b0c50", + "underlay_address": "fd00:1122:3344:103::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::4]:32345", + "dataset": { + "pool_name": "oxp_8a199f12-4f5c-483a-8aca-f97856658a35" + } + } + }, + "root": "/pool/ext/b93f880e-c55b-4d6c-9a16-939d84b628fc/crypt/zone" + }, + { + "zone": { + "id": "b9b7b4c2-284a-4ec1-80ea-75b7a43b71c4", + "underlay_address": "fd00:1122:3344:103::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::8]:32345", + "dataset": { + "pool_name": "oxp_cf940e15-dbc5-481b-866a-4de4b018898e" + } + } + }, + "root": "/pool/ext/cf940e15-dbc5-481b-866a-4de4b018898e/crypt/zone" + }, + { + "zone": { + "id": "7a85d50e-b524-41c1-a052-118027eb77db", + "underlay_address": "fd00:1122:3344:103::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:103::e]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/b93f880e-c55b-4d6c-9a16-939d84b628fc/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack2-sled9.json b/sled-agent/tests/output/new-zones-ledgers/rack2-sled9.json new file mode 100644 index 0000000000..d4a429f9b0 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack2-sled9.json @@ -0,0 +1,192 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "912346a2-d7e6-427e-b373-e8dcbe4fcea9", + "underlay_address": "fd00:1122:3344:105::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::5]:32345", + "dataset": { + "pool_name": "oxp_b358fb1e-f52a-4a63-9aab-170225509b37" + } + } + }, + "root": "/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone" + }, + { + "zone": { + "id": "3d420dff-c616-4c7d-bab1-0f9c2b5396bf", + "underlay_address": "fd00:1122:3344:105::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::a]:32345", + "dataset": { + "pool_name": "oxp_4eb2e4eb-41d8-496c-9a5a-687d7e004aa4" + } + } + }, + "root": "/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone" + }, + { + "zone": { + "id": "9c5d88c9-8ff1-4f23-9438-7b81322eaf68", + "underlay_address": "fd00:1122:3344:105::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::b]:32345", + "dataset": { + "pool_name": "oxp_aadf48eb-6ff0-40b5-a092-1fdd06c03e11" + } + } + }, + "root": "/pool/ext/4358f47f-f21e-4cc8-829e-0c7fc2400a59/crypt/zone" + }, + { + "zone": { + "id": "f9c1deca-1898-429e-8c93-254c7aa7bae6", + "underlay_address": "fd00:1122:3344:105::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::8]:32345", + "dataset": { + "pool_name": "oxp_d1cb6b7d-2b92-4b7d-8a4d-551987f0277e" + } + } + }, + "root": "/pool/ext/f8b11629-ced6-412a-9c3f-d169b99ee996/crypt/zone" + }, + { + "zone": { + "id": "ce8563f3-4a93-45ff-b727-cbfbee6aa413", + "underlay_address": "fd00:1122:3344:105::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::9]:32345", + "dataset": { + "pool_name": "oxp_4358f47f-f21e-4cc8-829e-0c7fc2400a59" + } + } + }, + "root": "/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone" + }, + { + "zone": { + "id": "9470ea7d-1920-4b4b-8fca-e7659a1ef733", + "underlay_address": "fd00:1122:3344:105::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::c]:32345", + "dataset": { + "pool_name": "oxp_17eff217-f0b1-4353-b133-0f68bbd5ceaa" + } + } + }, + "root": "/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone" + }, + { + "zone": { + "id": "375296e5-0a23-466c-b605-4204080f8103", + "underlay_address": "fd00:1122:3344:105::4", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:105::4]:17000" + } + }, + "root": "/pool/ext/4eb2e4eb-41d8-496c-9a5a-687d7e004aa4/crypt/zone" + }, + { + "zone": { + "id": "f9940969-b0e8-4e8c-86c7-4bc49cd15a5f", + "underlay_address": "fd00:1122:3344:105::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::7]:32345", + "dataset": { + "pool_name": "oxp_f8b11629-ced6-412a-9c3f-d169b99ee996" + } + } + }, + "root": "/pool/ext/17eff217-f0b1-4353-b133-0f68bbd5ceaa/crypt/zone" + }, + { + "zone": { + "id": "23dca27d-c79b-4930-a817-392e8aeaa4c1", + "underlay_address": "fd00:1122:3344:105::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::e]:32345", + "dataset": { + "pool_name": "oxp_57650e05-36ff-4de8-865f-b9562bdb67f5" + } + } + }, + "root": "/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone" + }, + { + "zone": { + "id": "92d3e4e9-0768-4772-83c1-23cce52190e9", + "underlay_address": "fd00:1122:3344:105::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::6]:32345", + "dataset": { + "pool_name": "oxp_eb1234a5-fdf7-4977-94d5-2eef25ce56a1" + } + } + }, + "root": "/pool/ext/b358fb1e-f52a-4a63-9aab-170225509b37/crypt/zone" + }, + { + "zone": { + "id": "b3e9fee2-24d2-44e7-8539-a6918e85cf2b", + "underlay_address": "fd00:1122:3344:105::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::d]:32345", + "dataset": { + "pool_name": "oxp_0ae29053-29a2-489e-a1e6-6aec0ecd05f8" + } + } + }, + "root": "/pool/ext/eb1234a5-fdf7-4977-94d5-2eef25ce56a1/crypt/zone" + }, + { + "zone": { + "id": "4c3ef132-ec83-4b1b-9574-7c7d3035f9e9", + "underlay_address": "fd00:1122:3344:105::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:105::3]:32221", + "dataset": { + "pool_name": "oxp_b358fb1e-f52a-4a63-9aab-170225509b37" + } + } + }, + "root": "/pool/ext/d1cb6b7d-2b92-4b7d-8a4d-551987f0277e/crypt/zone" + }, + { + "zone": { + "id": "76b79b96-eaa2-4341-9aba-e77cfc92e0a9", + "underlay_address": "fd00:1122:3344:105::f", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:105::f]:123", + "ntp_servers": [ + "c3ec3d1a-3172-4d36-bfd3-f54a04d5ba55.host.control-plane.oxide.internal", + "6ea2684c-115e-48a6-8453-ab52d1cecd73.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/0ae29053-29a2-489e-a1e6-6aec0ecd05f8/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled0.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled0.json new file mode 100644 index 0000000000..db6c55f556 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled0.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "0710ecea-dbc4-417f-a6f7-1b97c3045db1", + "underlay_address": "fd00:1122:3344:116::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::6]:32345", + "dataset": { + "pool_name": "oxp_d5313ef5-019c-4c47-bc5e-63794107a1bb" + } + } + }, + "root": "/pool/ext/904e93a9-d175-4a20-9006-8c1e847aecf7/crypt/zone" + }, + { + "zone": { + "id": "28b29d14-d55f-4b55-bbc1-f66e46ae3e70", + "underlay_address": "fd00:1122:3344:116::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::9]:32345", + "dataset": { + "pool_name": "oxp_60755ffe-e9ee-4619-a751-8b3ea6405e67" + } + } + }, + "root": "/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone" + }, + { + "zone": { + "id": "6f8f9fd2-b139-4069-a7e2-8d40efd58f6c", + "underlay_address": "fd00:1122:3344:116::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::d]:32345", + "dataset": { + "pool_name": "oxp_ccd2cb0b-782f-4026-a160-6d1192f04ca3" + } + } + }, + "root": "/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone" + }, + { + "zone": { + "id": "450308ad-bf4d-40ff-ba62-f3290f7fffaf", + "underlay_address": "fd00:1122:3344:116::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::4]:32345", + "dataset": { + "pool_name": "oxp_46b09442-65ba-4d59-9121-9803fe3b724b" + } + } + }, + "root": "/pool/ext/54d901cc-f75e-417d-8a9f-24363136d0ef/crypt/zone" + }, + { + "zone": { + "id": "9a22bbaa-eab4-4a32-8546-9882dc029483", + "underlay_address": "fd00:1122:3344:116::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::8]:32345", + "dataset": { + "pool_name": "oxp_93e3f350-75a0-4af0-bdac-baf9b423926f" + } + } + }, + "root": "/pool/ext/d5313ef5-019c-4c47-bc5e-63794107a1bb/crypt/zone" + }, + { + "zone": { + "id": "63a9dc49-0b5b-4483-95ed-553b545dc202", + "underlay_address": "fd00:1122:3344:116::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::a]:32345", + "dataset": { + "pool_name": "oxp_e3532845-76c0-42a9-903b-a07f7992e937" + } + } + }, + "root": "/pool/ext/60755ffe-e9ee-4619-a751-8b3ea6405e67/crypt/zone" + }, + { + "zone": { + "id": "1fef5b6c-78e4-4ad9-9973-9d8c78f1e232", + "underlay_address": "fd00:1122:3344:116::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::7]:32345", + "dataset": { + "pool_name": "oxp_54d901cc-f75e-417d-8a9f-24363136d0ef" + } + } + }, + "root": "/pool/ext/90d7b6f9-3e28-48b0-86ac-0486728075cf/crypt/zone" + }, + { + "zone": { + "id": "b2aab21a-cccd-4aa9-977f-a32090e6eaa7", + "underlay_address": "fd00:1122:3344:116::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::5]:32345", + "dataset": { + "pool_name": "oxp_90d7b6f9-3e28-48b0-86ac-0486728075cf" + } + } + }, + "root": "/pool/ext/46b09442-65ba-4d59-9121-9803fe3b724b/crypt/zone" + }, + { + "zone": { + "id": "fc1bbf28-24f3-4c1f-b367-2bc8231eb7d4", + "underlay_address": "fd00:1122:3344:116::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::b]:32345", + "dataset": { + "pool_name": "oxp_0a7bb0d3-408b-42b1-8846-76cf106a9580" + } + } + }, + "root": "/pool/ext/e3532845-76c0-42a9-903b-a07f7992e937/crypt/zone" + }, + { + "zone": { + "id": "bcb7617a-f76a-4912-8ccc-802d2a697e3c", + "underlay_address": "fd00:1122:3344:116::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:116::c]:32345", + "dataset": { + "pool_name": "oxp_904e93a9-d175-4a20-9006-8c1e847aecf7" + } + } + }, + "root": "/pool/ext/ccd2cb0b-782f-4026-a160-6d1192f04ca3/crypt/zone" + }, + { + "zone": { + "id": "371fba3a-658b-469b-b675-c90cc0d39254", + "underlay_address": "fd00:1122:3344:116::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:116::3]:32221", + "dataset": { + "pool_name": "oxp_46b09442-65ba-4d59-9121-9803fe3b724b" + } + } + }, + "root": "/pool/ext/46b09442-65ba-4d59-9121-9803fe3b724b/crypt/zone" + }, + { + "zone": { + "id": "5a4d89f5-49e0-4566-a99c-342d1bb26b1c", + "underlay_address": "fd00:1122:3344:116::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:116::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/60755ffe-e9ee-4619-a751-8b3ea6405e67/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled1.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled1.json new file mode 100644 index 0000000000..ae3e3d8f4a --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled1.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "f401d06c-46fc-42f8-aa51-7515a51355ce", + "underlay_address": "fd00:1122:3344:11c::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::8]:32345", + "dataset": { + "pool_name": "oxp_8a88768a-2dd5-43b7-bd40-0db77be4d3a8" + } + } + }, + "root": "/pool/ext/19d23d27-6a33-4203-b8c1-4b0df4ac791f/crypt/zone" + }, + { + "zone": { + "id": "721c96ea-08d4-4c89-828f-600e7e344916", + "underlay_address": "fd00:1122:3344:11c::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::6]:32345", + "dataset": { + "pool_name": "oxp_15259003-fb04-4547-b4a9-b4511893c0fd" + } + } + }, + "root": "/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone" + }, + { + "zone": { + "id": "ca17bdf9-51c5-4e1e-b822-856609070ec6", + "underlay_address": "fd00:1122:3344:11c::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::5]:32345", + "dataset": { + "pool_name": "oxp_d2a8ed82-22ef-46d8-ad40-e1cb2cecebee" + } + } + }, + "root": "/pool/ext/15259003-fb04-4547-b4a9-b4511893c0fd/crypt/zone" + }, + { + "zone": { + "id": "5825447e-1b5b-4960-b202-e75853d3d250", + "underlay_address": "fd00:1122:3344:11c::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::9]:32345", + "dataset": { + "pool_name": "oxp_04e94454-cbd4-4cee-ad69-42372bcbabd5" + } + } + }, + "root": "/pool/ext/542e0fb3-552c-4d3b-b853-da1f13b581a0/crypt/zone" + }, + { + "zone": { + "id": "b937d3f0-1352-47a2-b9d1-a9ccf9c82b16", + "underlay_address": "fd00:1122:3344:11c::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::c]:32345", + "dataset": { + "pool_name": "oxp_542e0fb3-552c-4d3b-b853-da1f13b581a0" + } + } + }, + "root": "/pool/ext/eedd1d58-4892-456f-aaf7-9d650c7921ca/crypt/zone" + }, + { + "zone": { + "id": "d63a677b-8dac-44ee-89a2-cc4cb151254d", + "underlay_address": "fd00:1122:3344:11c::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::3]:32345", + "dataset": { + "pool_name": "oxp_45b5f1ee-7b66-4d74-8364-54fa0c73775f" + } + } + }, + "root": "/pool/ext/8a88768a-2dd5-43b7-bd40-0db77be4d3a8/crypt/zone" + }, + { + "zone": { + "id": "abcb92ea-9f17-4cd8-897b-9d0d1ef7903a", + "underlay_address": "fd00:1122:3344:11c::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::4]:32345", + "dataset": { + "pool_name": "oxp_341d49db-c06a-416d-90e1-b0a3426ed02e" + } + } + }, + "root": "/pool/ext/eedd1d58-4892-456f-aaf7-9d650c7921ca/crypt/zone" + }, + { + "zone": { + "id": "000ac89d-db07-47ae-83cf-d9cafef013de", + "underlay_address": "fd00:1122:3344:11c::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::b]:32345", + "dataset": { + "pool_name": "oxp_eedd1d58-4892-456f-aaf7-9d650c7921ca" + } + } + }, + "root": "/pool/ext/04e94454-cbd4-4cee-ad69-42372bcbabd5/crypt/zone" + }, + { + "zone": { + "id": "29e1e2e4-695e-4c05-8f0c-c16a0a61d390", + "underlay_address": "fd00:1122:3344:11c::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::7]:32345", + "dataset": { + "pool_name": "oxp_19d23d27-6a33-4203-b8c1-4b0df4ac791f" + } + } + }, + "root": "/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone" + }, + { + "zone": { + "id": "9fa7d7be-a6de-4d36-b56b-d1cc5ca7c82c", + "underlay_address": "fd00:1122:3344:11c::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11c::a]:32345", + "dataset": { + "pool_name": "oxp_0fd7a0b1-ed4b-4dc6-8c44-a49c9628c7e1" + } + } + }, + "root": "/pool/ext/d2a8ed82-22ef-46d8-ad40-e1cb2cecebee/crypt/zone" + }, + { + "zone": { + "id": "249db5f1-45e2-4a5c-a91f-cc51dbd87040", + "underlay_address": "fd00:1122:3344:11c::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:11c::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/542e0fb3-552c-4d3b-b853-da1f13b581a0/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled11.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled11.json new file mode 100644 index 0000000000..c94417ffb8 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled11.json @@ -0,0 +1,201 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "7ddd0738-59df-4b67-a41e-7f0de9827187", + "underlay_address": "fd00:1122:3344:11e::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::4]:32345", + "dataset": { + "pool_name": "oxp_09af632a-6b1b-4a18-8c91-d392da38b02f" + } + } + }, + "root": "/pool/ext/09af632a-6b1b-4a18-8c91-d392da38b02f/crypt/zone" + }, + { + "zone": { + "id": "9706189f-713a-4394-b5dc-45dcf67dc46e", + "underlay_address": "fd00:1122:3344:11e::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::9]:32345", + "dataset": { + "pool_name": "oxp_4e1837c8-91ab-4d1d-abfd-f5144d88535e" + } + } + }, + "root": "/pool/ext/2f0d47cb-28d1-4350-8656-60c6121f773b/crypt/zone" + }, + { + "zone": { + "id": "7bdd841b-5e34-4c19-9066-b12578651446", + "underlay_address": "fd00:1122:3344:11e::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::a]:32345", + "dataset": { + "pool_name": "oxp_78d1e7f7-8d11-4fed-8b1e-be58908aea2f" + } + } + }, + "root": "/pool/ext/62c23f4b-8e7b-4cd8-9055-19c1d8bd5ac8/crypt/zone" + }, + { + "zone": { + "id": "74c0f60b-de5f-4456-a85f-f992a6e10424", + "underlay_address": "fd00:1122:3344:11e::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::b]:32345", + "dataset": { + "pool_name": "oxp_3b81d709-bf10-4dd7-a2c0-759d8acc2da0" + } + } + }, + "root": "/pool/ext/09af632a-6b1b-4a18-8c91-d392da38b02f/crypt/zone" + }, + { + "zone": { + "id": "da81ce6f-bd38-440e-b966-8a743092fa21", + "underlay_address": "fd00:1122:3344:11e::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::6]:32345", + "dataset": { + "pool_name": "oxp_62c23f4b-8e7b-4cd8-9055-19c1d8bd5ac8" + } + } + }, + "root": "/pool/ext/215dd02b-0de6-488a-9e65-5e588cd079fb/crypt/zone" + }, + { + "zone": { + "id": "febbca37-5279-400f-a2e9-6b5271b2d2fc", + "underlay_address": "fd00:1122:3344:11e::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::7]:32345", + "dataset": { + "pool_name": "oxp_fb33e773-fb93-41a0-8078-b653b9078dda" + } + } + }, + "root": "/pool/ext/2f0d47cb-28d1-4350-8656-60c6121f773b/crypt/zone" + }, + { + "zone": { + "id": "5100e222-5ea4-4e67-9040-679137e666c8", + "underlay_address": "fd00:1122:3344:11e::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::5]:32345", + "dataset": { + "pool_name": "oxp_23767587-2253-431b-8944-18b9bfefcb3d" + } + } + }, + "root": "/pool/ext/3b81d709-bf10-4dd7-a2c0-759d8acc2da0/crypt/zone" + }, + { + "zone": { + "id": "c7ec3bc8-08ca-4901-a45e-0d68db72c6a7", + "underlay_address": "fd00:1122:3344:11e::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::3]:32345", + "dataset": { + "pool_name": "oxp_2f0d47cb-28d1-4350-8656-60c6121f773b" + } + } + }, + "root": "/pool/ext/215dd02b-0de6-488a-9e65-5e588cd079fb/crypt/zone" + }, + { + "zone": { + "id": "1fc80dd3-0fd9-4403-96bd-5bbf9eb0f15a", + "underlay_address": "fd00:1122:3344:11e::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::c]:32345", + "dataset": { + "pool_name": "oxp_2c932d54-41fb-4ffe-a57f-0479b9e5841e" + } + } + }, + "root": "/pool/ext/3b81d709-bf10-4dd7-a2c0-759d8acc2da0/crypt/zone" + }, + { + "zone": { + "id": "4eacc68d-5699-440a-ab33-c75f259e4cc3", + "underlay_address": "fd00:1122:3344:11e::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11e::8]:32345", + "dataset": { + "pool_name": "oxp_215dd02b-0de6-488a-9e65-5e588cd079fb" + } + } + }, + "root": "/pool/ext/4e1837c8-91ab-4d1d-abfd-f5144d88535e/crypt/zone" + }, + { + "zone": { + "id": "cb901d3e-8811-4c4c-a274-a44130501ecf", + "underlay_address": "fd00:1122:3344:11e::d", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:11e::d]:123", + "ntp_servers": [ + "time.cloudflare.com" + ], + "dns_servers": [ + "1.1.1.1", + "8.8.8.8" + ], + "domain": null, + "nic": { + "id": "bcf9d9eb-b4ba-4fd5-91e0-55a3414ae049", + "kind": { + "type": "service", + "id": "cb901d3e-8811-4c4c-a274-a44130501ecf" + }, + "name": "ntp-cb901d3e-8811-4c4c-a274-a44130501ecf", + "ip": "172.30.3.6", + "mac": "A8:40:25:FF:D5:2F", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "45.154.216.39", + "first_port": 16384, + "last_port": 32767 + } + } + }, + "root": "/pool/ext/23767587-2253-431b-8944-18b9bfefcb3d/crypt/zone" + }, + { + "zone": { + "id": "be4aada9-d160-401d-a630-a0764c039702", + "underlay_address": "fd00:1122:3344:2::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_2f0d47cb-28d1-4350-8656-60c6121f773b" + }, + "http_address": "[fd00:1122:3344:2::1]:5353", + "dns_address": "[fd00:1122:3344:2::1]:53", + "gz_address": "fd00:1122:3344:2::2", + "gz_address_index": 1 + } + }, + "root": "/pool/ext/78d1e7f7-8d11-4fed-8b1e-be58908aea2f/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled12.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled12.json new file mode 100644 index 0000000000..bfc30cf160 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled12.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "d8f1b9d2-fa2e-4f03-bbea-2039448d7792", + "underlay_address": "fd00:1122:3344:112::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::5]:32345", + "dataset": { + "pool_name": "oxp_7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1" + } + } + }, + "root": "/pool/ext/78d9f0ae-8e7f-450e-abc2-76b983efa5cd/crypt/zone" + }, + { + "zone": { + "id": "2074a935-c0b3-4c4f-aae5-a29adae3e1ac", + "underlay_address": "fd00:1122:3344:112::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::8]:32345", + "dataset": { + "pool_name": "oxp_ac663368-45fb-447c-811e-561c68e37bdd" + } + } + }, + "root": "/pool/ext/ac663368-45fb-447c-811e-561c68e37bdd/crypt/zone" + }, + { + "zone": { + "id": "2885d3c7-ad7d-445c-8630-dc6c81f8caa0", + "underlay_address": "fd00:1122:3344:112::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::a]:32345", + "dataset": { + "pool_name": "oxp_8e82e8da-e1c5-4867-bc1c-b5441f9c1010" + } + } + }, + "root": "/pool/ext/8e82e8da-e1c5-4867-bc1c-b5441f9c1010/crypt/zone" + }, + { + "zone": { + "id": "1eca241b-6868-4c59-876b-58356654f3b5", + "underlay_address": "fd00:1122:3344:112::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::c]:32345", + "dataset": { + "pool_name": "oxp_fde16c69-aa47-4a15-bb3f-3a5861ae45bd" + } + } + }, + "root": "/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone" + }, + { + "zone": { + "id": "cc656f2e-8542-4986-8524-2f55984939c1", + "underlay_address": "fd00:1122:3344:112::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::d]:32345", + "dataset": { + "pool_name": "oxp_21e6d0f9-887e-4d6f-9a00-4cd61139eea6" + } + } + }, + "root": "/pool/ext/21e6d0f9-887e-4d6f-9a00-4cd61139eea6/crypt/zone" + }, + { + "zone": { + "id": "dfb1ebce-a4c7-4b50-9435-9a79b884c1af", + "underlay_address": "fd00:1122:3344:112::3", + "zone_type": { + "type": "clickhouse", + "address": "[fd00:1122:3344:112::3]:8123", + "dataset": { + "pool_name": "oxp_4f045315-de51-46ed-a011-16496615278f" + } + } + }, + "root": "/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone" + }, + { + "zone": { + "id": "a95d90ed-b2b1-4a5d-8d0d-4195b34bc764", + "underlay_address": "fd00:1122:3344:112::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::6]:32345", + "dataset": { + "pool_name": "oxp_d2c77c69-14d7-442e-8b47-a0d7af5a0e7e" + } + } + }, + "root": "/pool/ext/fad56ff1-ad9f-4215-b584-522eab18cf7b/crypt/zone" + }, + { + "zone": { + "id": "1d3ebc90-d5a5-4cb0-ae90-50bb2163ae13", + "underlay_address": "fd00:1122:3344:112::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::b]:32345", + "dataset": { + "pool_name": "oxp_fad56ff1-ad9f-4215-b584-522eab18cf7b" + } + } + }, + "root": "/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone" + }, + { + "zone": { + "id": "7af9f38b-0c7a-402e-8db3-7c7fb50b4665", + "underlay_address": "fd00:1122:3344:112::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::9]:32345", + "dataset": { + "pool_name": "oxp_d0693580-5c5a-449f-803f-ce7188ebc580" + } + } + }, + "root": "/pool/ext/d2c77c69-14d7-442e-8b47-a0d7af5a0e7e/crypt/zone" + }, + { + "zone": { + "id": "94d9bb0a-ecd2-4501-b960-60982f55ad12", + "underlay_address": "fd00:1122:3344:112::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::7]:32345", + "dataset": { + "pool_name": "oxp_78d9f0ae-8e7f-450e-abc2-76b983efa5cd" + } + } + }, + "root": "/pool/ext/ac663368-45fb-447c-811e-561c68e37bdd/crypt/zone" + }, + { + "zone": { + "id": "277c1105-576e-4ec1-8e2c-cbae2f5ac9f6", + "underlay_address": "fd00:1122:3344:112::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:112::4]:32345", + "dataset": { + "pool_name": "oxp_4f045315-de51-46ed-a011-16496615278f" + } + } + }, + "root": "/pool/ext/7d7ed1b7-7b77-4f0a-abb1-27de7cb584d1/crypt/zone" + }, + { + "zone": { + "id": "555c3407-a76c-4ea4-a17a-a670d85a59b0", + "underlay_address": "fd00:1122:3344:112::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:112::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/8e82e8da-e1c5-4867-bc1c-b5441f9c1010/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled13.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled13.json new file mode 100644 index 0000000000..66c04be148 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled13.json @@ -0,0 +1,201 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "fbcf51c9-a732-4a03-8c19-cfb5b819cb7a", + "underlay_address": "fd00:1122:3344:104::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::5]:32345", + "dataset": { + "pool_name": "oxp_382a2961-cd27-4a9c-901d-468a45ff5708" + } + } + }, + "root": "/pool/ext/e99994ae-61ca-4742-a02c-eb0a8a5b69ff/crypt/zone" + }, + { + "zone": { + "id": "7f8a5026-1f1d-4ab3-8c04-077bfda2f815", + "underlay_address": "fd00:1122:3344:104::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::4]:32345", + "dataset": { + "pool_name": "oxp_9c99b9b6-8018-455e-a58a-c048ddd3e11b" + } + } + }, + "root": "/pool/ext/22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167/crypt/zone" + }, + { + "zone": { + "id": "6d45d856-0e49-4eb7-ad76-989a9ae636a2", + "underlay_address": "fd00:1122:3344:104::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::3]:32345", + "dataset": { + "pool_name": "oxp_b74a84fa-b4c8-4c5f-92f4-f4e62a0a311d" + } + } + }, + "root": "/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone" + }, + { + "zone": { + "id": "c8dc7fff-72c8-49eb-a552-d605f8655134", + "underlay_address": "fd00:1122:3344:104::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::6]:32345", + "dataset": { + "pool_name": "oxp_22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167" + } + } + }, + "root": "/pool/ext/22c79e54-37ef-4ad2-a6cb-a7ee3e4f7167/crypt/zone" + }, + { + "zone": { + "id": "128a90f5-8889-4665-8343-2c7098f2922c", + "underlay_address": "fd00:1122:3344:104::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::7]:32345", + "dataset": { + "pool_name": "oxp_8b3d0b51-c6a5-4d2c-827a-0d0d1471136d" + } + } + }, + "root": "/pool/ext/29cd042b-e772-4d26-ac85-ef16009950bd/crypt/zone" + }, + { + "zone": { + "id": "a72f1878-3b03-4267-9024-5df5ebae69de", + "underlay_address": "fd00:1122:3344:104::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::a]:32345", + "dataset": { + "pool_name": "oxp_e99994ae-61ca-4742-a02c-eb0a8a5b69ff" + } + } + }, + "root": "/pool/ext/8b3d0b51-c6a5-4d2c-827a-0d0d1471136d/crypt/zone" + }, + { + "zone": { + "id": "6a9165a2-9b66-485a-aaf0-70d89d60bb6c", + "underlay_address": "fd00:1122:3344:104::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::b]:32345", + "dataset": { + "pool_name": "oxp_6a02f05f-e400-4c80-8df8-89aaecb6c12b" + } + } + }, + "root": "/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone" + }, + { + "zone": { + "id": "9677c4ed-96bc-4dcb-ae74-f7a3e9d2b5e2", + "underlay_address": "fd00:1122:3344:104::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::c]:32345", + "dataset": { + "pool_name": "oxp_7c30978f-ee87-4e53-8fdf-3455e5e851b7" + } + } + }, + "root": "/pool/ext/29cd042b-e772-4d26-ac85-ef16009950bd/crypt/zone" + }, + { + "zone": { + "id": "179039e7-3ffd-4b76-9379-bef41d42a5ff", + "underlay_address": "fd00:1122:3344:104::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::8]:32345", + "dataset": { + "pool_name": "oxp_4db7e002-e112-4bfc-a41e-8ae26991b01e" + } + } + }, + "root": "/pool/ext/8b3d0b51-c6a5-4d2c-827a-0d0d1471136d/crypt/zone" + }, + { + "zone": { + "id": "6067e31e-b6a3-4114-9e49-0296adc8e7af", + "underlay_address": "fd00:1122:3344:104::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::9]:32345", + "dataset": { + "pool_name": "oxp_29cd042b-e772-4d26-ac85-ef16009950bd" + } + } + }, + "root": "/pool/ext/9c99b9b6-8018-455e-a58a-c048ddd3e11b/crypt/zone" + }, + { + "zone": { + "id": "440dd615-e11f-4a5d-aeb4-dcf88bb314de", + "underlay_address": "fd00:1122:3344:104::d", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:104::d]:123", + "ntp_servers": [ + "time.cloudflare.com" + ], + "dns_servers": [ + "1.1.1.1", + "8.8.8.8" + ], + "domain": null, + "nic": { + "id": "0b52fe1b-f4cc-43b1-9ac3-4ebb4ab60133", + "kind": { + "type": "service", + "id": "440dd615-e11f-4a5d-aeb4-dcf88bb314de" + }, + "name": "ntp-440dd615-e11f-4a5d-aeb4-dcf88bb314de", + "ip": "172.30.3.5", + "mac": "A8:40:25:FF:85:1E", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "45.154.216.38", + "first_port": 0, + "last_port": 16383 + } + } + }, + "root": "/pool/ext/382a2961-cd27-4a9c-901d-468a45ff5708/crypt/zone" + }, + { + "zone": { + "id": "06e2de03-bd92-404c-a8ea-a13185539d24", + "underlay_address": "fd00:1122:3344:1::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_b74a84fa-b4c8-4c5f-92f4-f4e62a0a311d" + }, + "http_address": "[fd00:1122:3344:1::1]:5353", + "dns_address": "[fd00:1122:3344:1::1]:53", + "gz_address": "fd00:1122:3344:1::2", + "gz_address_index": 0 + } + }, + "root": "/pool/ext/e99994ae-61ca-4742-a02c-eb0a8a5b69ff/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled14.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled14.json new file mode 100644 index 0000000000..e8d061dbfd --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled14.json @@ -0,0 +1,198 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "ac35afab-a312-43c3-a42d-04b8e99fcbde", + "underlay_address": "fd00:1122:3344:111::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::4]:32345", + "dataset": { + "pool_name": "oxp_6601065c-c172-4118-81b4-16adde7e9401" + } + } + }, + "root": "/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone" + }, + { + "zone": { + "id": "6cd94da2-35b9-4683-a931-29ad4a5ed0ef", + "underlay_address": "fd00:1122:3344:111::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::c]:32345", + "dataset": { + "pool_name": "oxp_58276eba-a53c-4ef3-b374-4cdcde4d6e12" + } + } + }, + "root": "/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone" + }, + { + "zone": { + "id": "41f07d39-fcc0-4796-8b7c-7cfcd9135f78", + "underlay_address": "fd00:1122:3344:111::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::9]:32345", + "dataset": { + "pool_name": "oxp_4b90abdc-3348-4158-bedc-5bcd56e281d8" + } + } + }, + "root": "/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone" + }, + { + "zone": { + "id": "44c35566-dd64-4e4a-896e-c50aaa3df14f", + "underlay_address": "fd00:1122:3344:111::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:111::3]:12221", + "external_ip": "45.154.216.37", + "nic": { + "id": "6f824d20-6ce0-4e8b-9ce3-b12dd2b59913", + "kind": { + "type": "service", + "id": "44c35566-dd64-4e4a-896e-c50aaa3df14f" + }, + "name": "nexus-44c35566-dd64-4e4a-896e-c50aaa3df14f", + "ip": "172.30.2.7", + "mac": "A8:40:25:FF:E8:5F", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "8.8.8.8" + ] + } + }, + "root": "/pool/ext/435d7a1b-2865-4d49-903f-a68f464ade4d/crypt/zone" + }, + { + "zone": { + "id": "e5020d24-8652-456b-bf92-cd7d255a34c5", + "underlay_address": "fd00:1122:3344:111::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::6]:32345", + "dataset": { + "pool_name": "oxp_f6925045-363d-4e18-9bde-ee2987b33d21" + } + } + }, + "root": "/pool/ext/6601065c-c172-4118-81b4-16adde7e9401/crypt/zone" + }, + { + "zone": { + "id": "8f25f258-afd7-4351-83e4-24220ec0c251", + "underlay_address": "fd00:1122:3344:111::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::8]:32345", + "dataset": { + "pool_name": "oxp_8e955f54-fbef-4021-9eec-457825468813" + } + } + }, + "root": "/pool/ext/6601065c-c172-4118-81b4-16adde7e9401/crypt/zone" + }, + { + "zone": { + "id": "26aa50ec-d70a-47ea-85fc-e55c62a2e0c6", + "underlay_address": "fd00:1122:3344:111::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::5]:32345", + "dataset": { + "pool_name": "oxp_24d7e250-9fc6-459e-8155-30f8e8ccb28c" + } + } + }, + "root": "/pool/ext/435d7a1b-2865-4d49-903f-a68f464ade4d/crypt/zone" + }, + { + "zone": { + "id": "68dc212f-a96a-420f-8334-b11ee5d7cb95", + "underlay_address": "fd00:1122:3344:111::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::7]:32345", + "dataset": { + "pool_name": "oxp_4353b00b-937e-4d07-aea6-014c57b6f12c" + } + } + }, + "root": "/pool/ext/24d7e250-9fc6-459e-8155-30f8e8ccb28c/crypt/zone" + }, + { + "zone": { + "id": "475140fa-a5dc-4ec1-876d-751c48adfc37", + "underlay_address": "fd00:1122:3344:111::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::a]:32345", + "dataset": { + "pool_name": "oxp_ee55b053-6874-4e20-86b5-2e105e64c068" + } + } + }, + "root": "/pool/ext/ee55b053-6874-4e20-86b5-2e105e64c068/crypt/zone" + }, + { + "zone": { + "id": "09d5a8c9-00db-4914-a2c6-7ae3d2da4558", + "underlay_address": "fd00:1122:3344:111::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::d]:32345", + "dataset": { + "pool_name": "oxp_9ab5aba5-47dc-4bc4-8f6d-7cbe0f98a9a2" + } + } + }, + "root": "/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone" + }, + { + "zone": { + "id": "014f6a39-ad64-4f0a-9fef-01ca0d184cbf", + "underlay_address": "fd00:1122:3344:111::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:111::b]:32345", + "dataset": { + "pool_name": "oxp_435d7a1b-2865-4d49-903f-a68f464ade4d" + } + } + }, + "root": "/pool/ext/f6925045-363d-4e18-9bde-ee2987b33d21/crypt/zone" + }, + { + "zone": { + "id": "aceaf348-ba07-4965-a543-63a800826fe8", + "underlay_address": "fd00:1122:3344:111::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:111::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/8e955f54-fbef-4021-9eec-457825468813/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled15.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled15.json new file mode 100644 index 0000000000..e3b3dba86a --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled15.json @@ -0,0 +1,196 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "09a9ecee-1e7c-4819-b27a-73bb61099ce7", + "underlay_address": "fd00:1122:3344:114::3", + "zone_type": { + "type": "external_dns", + "dataset": { + "pool_name": "oxp_b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e" + }, + "http_address": "[fd00:1122:3344:114::3]:5353", + "dns_address": "45.154.216.33:53", + "nic": { + "id": "400ca77b-7fee-47d5-8f17-1f4b9c729f27", + "kind": { + "type": "service", + "id": "09a9ecee-1e7c-4819-b27a-73bb61099ce7" + }, + "name": "external-dns-09a9ecee-1e7c-4819-b27a-73bb61099ce7", + "ip": "172.30.1.5", + "mac": "A8:40:25:FF:B7:C7", + "subnet": "172.30.1.0/24", + "vni": 100, + "primary": true, + "slot": 0 + } + } + }, + "root": "/pool/ext/9e878b1e-bf92-4155-8162-640851c2f5d5/crypt/zone" + }, + { + "zone": { + "id": "1792e003-55f7-49b8-906c-4160db91bc23", + "underlay_address": "fd00:1122:3344:114::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::5]:32345", + "dataset": { + "pool_name": "oxp_7f3a760f-a4c0-456f-8a22-2d06ecac1022" + } + } + }, + "root": "/pool/ext/76f09ad5-c96c-4748-bbe4-71afaea7bc5e/crypt/zone" + }, + { + "zone": { + "id": "73bc7c0e-1034-449f-8920-4a1f418653ff", + "underlay_address": "fd00:1122:3344:114::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::8]:32345", + "dataset": { + "pool_name": "oxp_e87037be-1cdf-4c6e-a8a3-c27b830eaef9" + } + } + }, + "root": "/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone" + }, + { + "zone": { + "id": "06dc6619-6251-4543-9a10-da1698af49d5", + "underlay_address": "fd00:1122:3344:114::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::9]:32345", + "dataset": { + "pool_name": "oxp_ee34c530-ce70-4f1a-8c97-d0ebb77ccfc8" + } + } + }, + "root": "/pool/ext/9e878b1e-bf92-4155-8162-640851c2f5d5/crypt/zone" + }, + { + "zone": { + "id": "0d796c52-37ca-490d-b42f-dcc22fe5fd6b", + "underlay_address": "fd00:1122:3344:114::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::c]:32345", + "dataset": { + "pool_name": "oxp_9ec2b893-d486-4b24-a077-1a297f9eb15f" + } + } + }, + "root": "/pool/ext/9e72c0e2-4895-4791-b606-2f18e432fb69/crypt/zone" + }, + { + "zone": { + "id": "91d0011f-de44-4823-bc26-a447affa39bc", + "underlay_address": "fd00:1122:3344:114::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::a]:32345", + "dataset": { + "pool_name": "oxp_85e81a14-031d-4a63-a91f-981c64e91f60" + } + } + }, + "root": "/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone" + }, + { + "zone": { + "id": "0c44a2f1-559a-459c-9931-e0e7964d41c6", + "underlay_address": "fd00:1122:3344:114::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::b]:32345", + "dataset": { + "pool_name": "oxp_76f09ad5-c96c-4748-bbe4-71afaea7bc5e" + } + } + }, + "root": "/pool/ext/e87037be-1cdf-4c6e-a8a3-c27b830eaef9/crypt/zone" + }, + { + "zone": { + "id": "ea363819-96f6-4fb6-a203-f18414f1c60e", + "underlay_address": "fd00:1122:3344:114::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::4]:32345", + "dataset": { + "pool_name": "oxp_b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e" + } + } + }, + "root": "/pool/ext/b7fbb6db-aa4a-4a6d-8206-b7bdc000d56e/crypt/zone" + }, + { + "zone": { + "id": "21592c39-da6b-4527-842e-edeeceffafa1", + "underlay_address": "fd00:1122:3344:114::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::6]:32345", + "dataset": { + "pool_name": "oxp_9e72c0e2-4895-4791-b606-2f18e432fb69" + } + } + }, + "root": "/pool/ext/7aff8429-b65d-4a53-a796-7221ac7581a9/crypt/zone" + }, + { + "zone": { + "id": "f33b1263-f1b2-43a6-a8aa-5f8570dd4e72", + "underlay_address": "fd00:1122:3344:114::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::7]:32345", + "dataset": { + "pool_name": "oxp_9e878b1e-bf92-4155-8162-640851c2f5d5" + } + } + }, + "root": "/pool/ext/7f3a760f-a4c0-456f-8a22-2d06ecac1022/crypt/zone" + }, + { + "zone": { + "id": "6f42b469-5a36-4048-a152-e884f7e8a206", + "underlay_address": "fd00:1122:3344:114::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:114::d]:32345", + "dataset": { + "pool_name": "oxp_7aff8429-b65d-4a53-a796-7221ac7581a9" + } + } + }, + "root": "/pool/ext/9e72c0e2-4895-4791-b606-2f18e432fb69/crypt/zone" + }, + { + "zone": { + "id": "ad77d594-8f78-4d33-a5e4-59887060178e", + "underlay_address": "fd00:1122:3344:114::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:114::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/85e81a14-031d-4a63-a91f-981c64e91f60/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled16.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled16.json new file mode 100644 index 0000000000..3cd727e1bc --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled16.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "dcb9a4ae-2c89-4a74-905b-b7936ff49c19", + "underlay_address": "fd00:1122:3344:11f::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::9]:32345", + "dataset": { + "pool_name": "oxp_af509039-d27f-4095-bc9d-cecbc5c606db" + } + } + }, + "root": "/pool/ext/44ee0fb4-6034-44e8-b3de-b3a44457ffca/crypt/zone" + }, + { + "zone": { + "id": "dbd46f71-ec39-4b72-a77d-9d281ccb37e0", + "underlay_address": "fd00:1122:3344:11f::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::b]:32345", + "dataset": { + "pool_name": "oxp_44ee0fb4-6034-44e8-b3de-b3a44457ffca" + } + } + }, + "root": "/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone" + }, + { + "zone": { + "id": "a1f30569-a5c6-4a6d-922e-241966aea142", + "underlay_address": "fd00:1122:3344:11f::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::6]:32345", + "dataset": { + "pool_name": "oxp_d2133e8b-51cc-455e-89d0-5454fd4fe109" + } + } + }, + "root": "/pool/ext/3f57835b-1469-499a-8757-7cc56acc5d49/crypt/zone" + }, + { + "zone": { + "id": "a33e25ae-4e41-40f4-843d-3d12f62d8cb6", + "underlay_address": "fd00:1122:3344:11f::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::8]:32345", + "dataset": { + "pool_name": "oxp_c8e4a7f4-1ae6-4683-8397-ea53475a53e8" + } + } + }, + "root": "/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone" + }, + { + "zone": { + "id": "65ed75c2-2d80-4de5-a6f6-adfa6516c7cf", + "underlay_address": "fd00:1122:3344:11f::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::c]:32345", + "dataset": { + "pool_name": "oxp_3f57835b-1469-499a-8757-7cc56acc5d49" + } + } + }, + "root": "/pool/ext/cd8cd75c-632b-4527-889a-7ca0c080fe2c/crypt/zone" + }, + { + "zone": { + "id": "bc6ccf18-6b9b-4687-8b70-c7917d972ae0", + "underlay_address": "fd00:1122:3344:11f::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::a]:32345", + "dataset": { + "pool_name": "oxp_cd8cd75c-632b-4527-889a-7ca0c080fe2c" + } + } + }, + "root": "/pool/ext/5e32c0a3-1210-402b-91fb-256946eeac2b/crypt/zone" + }, + { + "zone": { + "id": "06233bfe-a857-4819-aefe-212af9eeb90f", + "underlay_address": "fd00:1122:3344:11f::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::5]:32345", + "dataset": { + "pool_name": "oxp_c8a1aaf1-d27c-45fd-9f8d-80ac6bf6865d" + } + } + }, + "root": "/pool/ext/af509039-d27f-4095-bc9d-cecbc5c606db/crypt/zone" + }, + { + "zone": { + "id": "0bbfef71-9eae-43b6-b5e7-0060ce9269dd", + "underlay_address": "fd00:1122:3344:11f::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::4]:32345", + "dataset": { + "pool_name": "oxp_5e32c0a3-1210-402b-91fb-256946eeac2b" + } + } + }, + "root": "/pool/ext/af509039-d27f-4095-bc9d-cecbc5c606db/crypt/zone" + }, + { + "zone": { + "id": "550e10ee-24d1-444f-80be-2744dd321e0f", + "underlay_address": "fd00:1122:3344:11f::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11f::7]:32345", + "dataset": { + "pool_name": "oxp_f437ce0e-eb45-4be8-b1fe-33ed2656eb01" + } + } + }, + "root": "/pool/ext/44ee0fb4-6034-44e8-b3de-b3a44457ffca/crypt/zone" + }, + { + "zone": { + "id": "86d768f3-ece2-4956-983f-999bdb23a983", + "underlay_address": "fd00:1122:3344:11f::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:11f::3]:32221", + "dataset": { + "pool_name": "oxp_5e32c0a3-1210-402b-91fb-256946eeac2b" + } + } + }, + "root": "/pool/ext/c8a1aaf1-d27c-45fd-9f8d-80ac6bf6865d/crypt/zone" + }, + { + "zone": { + "id": "2f358812-f72c-4838-a5ea-7d78d0954be0", + "underlay_address": "fd00:1122:3344:11f::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:11f::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/f437ce0e-eb45-4be8-b1fe-33ed2656eb01/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled17.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled17.json new file mode 100644 index 0000000000..09981ecacc --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled17.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "525a19a2-d4ac-418d-bdcf-2ce26e7abe70", + "underlay_address": "fd00:1122:3344:107::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::a]:32345", + "dataset": { + "pool_name": "oxp_cb774d2f-ff86-4fd7-866b-17a6b10e61f0" + } + } + }, + "root": "/pool/ext/e17b68b5-f50c-4fc3-b55a-80d284c6c32d/crypt/zone" + }, + { + "zone": { + "id": "7af188e1-6175-4769-9e4f-2ca7a98b76f6", + "underlay_address": "fd00:1122:3344:107::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::4]:32345", + "dataset": { + "pool_name": "oxp_0cbbcf22-770d-4e75-9148-e6109b129093" + } + } + }, + "root": "/pool/ext/b998e8df-ea69-4bdd-84cb-b7f17075b060/crypt/zone" + }, + { + "zone": { + "id": "2544540f-6ffc-46c0-84bf-f42a110c02d7", + "underlay_address": "fd00:1122:3344:107::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::6]:32345", + "dataset": { + "pool_name": "oxp_e17b68b5-f50c-4fc3-b55a-80d284c6c32d" + } + } + }, + "root": "/pool/ext/521fa477-4d83-49a8-a5cf-c267b7f0c409/crypt/zone" + }, + { + "zone": { + "id": "cfc20f72-cac2-4681-a6d8-e5a0accafbb7", + "underlay_address": "fd00:1122:3344:107::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::7]:32345", + "dataset": { + "pool_name": "oxp_b998e8df-ea69-4bdd-84cb-b7f17075b060" + } + } + }, + "root": "/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone" + }, + { + "zone": { + "id": "e24be791-5773-425e-a3df-e35ca81570c7", + "underlay_address": "fd00:1122:3344:107::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::9]:32345", + "dataset": { + "pool_name": "oxp_7849c221-dc7f-43ac-ac47-bc51864e083b" + } + } + }, + "root": "/pool/ext/7849c221-dc7f-43ac-ac47-bc51864e083b/crypt/zone" + }, + { + "zone": { + "id": "170856ee-21cf-4780-8903-175d558bc7cc", + "underlay_address": "fd00:1122:3344:107::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::3]:32345", + "dataset": { + "pool_name": "oxp_618e21e5-77d4-40ba-9f8e-7960e9ad92e2" + } + } + }, + "root": "/pool/ext/aa7a37fb-2f03-4d5c-916b-db3a4fc269ac/crypt/zone" + }, + { + "zone": { + "id": "604278ff-525a-4d41-82ff-07aef3174d38", + "underlay_address": "fd00:1122:3344:107::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::5]:32345", + "dataset": { + "pool_name": "oxp_521fa477-4d83-49a8-a5cf-c267b7f0c409" + } + } + }, + "root": "/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone" + }, + { + "zone": { + "id": "d0d4fcc0-6ed0-410a-99c7-5daf34014421", + "underlay_address": "fd00:1122:3344:107::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::b]:32345", + "dataset": { + "pool_name": "oxp_aa7a37fb-2f03-4d5c-916b-db3a4fc269ac" + } + } + }, + "root": "/pool/ext/aa7a37fb-2f03-4d5c-916b-db3a4fc269ac/crypt/zone" + }, + { + "zone": { + "id": "c935df7b-2629-48ee-bc10-20508301905d", + "underlay_address": "fd00:1122:3344:107::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::c]:32345", + "dataset": { + "pool_name": "oxp_793fd018-5fdc-4e54-9c45-f8023fa3ea18" + } + } + }, + "root": "/pool/ext/7849c221-dc7f-43ac-ac47-bc51864e083b/crypt/zone" + }, + { + "zone": { + "id": "4ba5f3b6-8be5-4a85-bc57-a5e3b0b867d8", + "underlay_address": "fd00:1122:3344:107::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:107::8]:32345", + "dataset": { + "pool_name": "oxp_e80e7996-c572-481e-8c22-61c16c6e47f4" + } + } + }, + "root": "/pool/ext/e17b68b5-f50c-4fc3-b55a-80d284c6c32d/crypt/zone" + }, + { + "zone": { + "id": "395c9d6e-3bd0-445e-9269-46c3260edb83", + "underlay_address": "fd00:1122:3344:107::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:107::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/0cbbcf22-770d-4e75-9148-e6109b129093/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled18.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled18.json new file mode 100644 index 0000000000..708019883e --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled18.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "c7096dd4-e429-4a6f-9725-041a77ef2513", + "underlay_address": "fd00:1122:3344:11a::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::6]:32345", + "dataset": { + "pool_name": "oxp_dcf62af6-c0f9-4eb5-9b23-9424ef8f3d32" + } + } + }, + "root": "/pool/ext/b869e463-c8b9-4c12-a6b9-13175b3896dd/crypt/zone" + }, + { + "zone": { + "id": "09dd367f-b32f-43f3-aa53-11ccec1cd0c9", + "underlay_address": "fd00:1122:3344:11a::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::9]:32345", + "dataset": { + "pool_name": "oxp_d7d00317-42c7-4d1e-a04c-85491fb230cd" + } + } + }, + "root": "/pool/ext/d7d00317-42c7-4d1e-a04c-85491fb230cd/crypt/zone" + }, + { + "zone": { + "id": "fb2f85f1-05b3-432f-9bb5-63fb27a762b1", + "underlay_address": "fd00:1122:3344:11a::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::5]:32345", + "dataset": { + "pool_name": "oxp_db4a9949-68da-4c1c-9a1c-49083eba14fe" + } + } + }, + "root": "/pool/ext/db4a9949-68da-4c1c-9a1c-49083eba14fe/crypt/zone" + }, + { + "zone": { + "id": "5b89425e-69e4-4305-8f33-dc5768a1849e", + "underlay_address": "fd00:1122:3344:11a::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::a]:32345", + "dataset": { + "pool_name": "oxp_64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e" + } + } + }, + "root": "/pool/ext/64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e/crypt/zone" + }, + { + "zone": { + "id": "a5156db4-273a-4f8b-b8d8-df77062a6c63", + "underlay_address": "fd00:1122:3344:11a::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::4]:32345", + "dataset": { + "pool_name": "oxp_b869e463-c8b9-4c12-a6b9-13175b3896dd" + } + } + }, + "root": "/pool/ext/dcf62af6-c0f9-4eb5-9b23-9424ef8f3d32/crypt/zone" + }, + { + "zone": { + "id": "1f2d2f86-b69b-4130-bb9b-e62ba0cb6802", + "underlay_address": "fd00:1122:3344:11a::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::b]:32345", + "dataset": { + "pool_name": "oxp_153ffee4-5d7a-4786-ad33-d5567b434fe0" + } + } + }, + "root": "/pool/ext/174a067d-1c5a-49f7-a29f-1e62ab1c3796/crypt/zone" + }, + { + "zone": { + "id": "1e249cc9-52e7-4d66-b713-8ace1392e991", + "underlay_address": "fd00:1122:3344:11a::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::7]:32345", + "dataset": { + "pool_name": "oxp_04b6215e-9651-4a3c-ba1b-b8a1e67b3d89" + } + } + }, + "root": "/pool/ext/db4a9949-68da-4c1c-9a1c-49083eba14fe/crypt/zone" + }, + { + "zone": { + "id": "eb779538-2b1b-4d1d-8c7e-b15f04db6e53", + "underlay_address": "fd00:1122:3344:11a::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::3]:32345", + "dataset": { + "pool_name": "oxp_aacb8524-3562-4f97-a616-9023230d6efa" + } + } + }, + "root": "/pool/ext/174a067d-1c5a-49f7-a29f-1e62ab1c3796/crypt/zone" + }, + { + "zone": { + "id": "b575d52d-be7d-46af-814b-91e6d18f3464", + "underlay_address": "fd00:1122:3344:11a::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::8]:32345", + "dataset": { + "pool_name": "oxp_174a067d-1c5a-49f7-a29f-1e62ab1c3796" + } + } + }, + "root": "/pool/ext/64a1bad7-d1b1-4e39-a3f3-9b8d73c4709e/crypt/zone" + }, + { + "zone": { + "id": "274200bc-eac7-47d7-8a57-4b7be794caba", + "underlay_address": "fd00:1122:3344:11a::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11a::c]:32345", + "dataset": { + "pool_name": "oxp_2e7644e4-7d46-42bf-8e7a-9c3f39085b3f" + } + } + }, + "root": "/pool/ext/2e7644e4-7d46-42bf-8e7a-9c3f39085b3f/crypt/zone" + }, + { + "zone": { + "id": "bc20ba3a-df62-4a62-97c2-75b5653f84b4", + "underlay_address": "fd00:1122:3344:11a::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:11a::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/04b6215e-9651-4a3c-ba1b-b8a1e67b3d89/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled19.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled19.json new file mode 100644 index 0000000000..197df304e3 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled19.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "9c73abb9-edb8-4aa2-835b-c25ebe4466d9", + "underlay_address": "fd00:1122:3344:109::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::7]:32345", + "dataset": { + "pool_name": "oxp_b7a3032f-7b8c-4a6a-9fa2-e5773bfdbc94" + } + } + }, + "root": "/pool/ext/46d21f3d-23be-4361-b5c5-9d0f6ece5b8c/crypt/zone" + }, + { + "zone": { + "id": "ca576bda-cbdd-4bb9-9d75-ce06d569e926", + "underlay_address": "fd00:1122:3344:109::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::a]:32345", + "dataset": { + "pool_name": "oxp_863c4bc4-9c7e-453c-99d8-a3d509f49f3e" + } + } + }, + "root": "/pool/ext/7e67cb32-0c00-4090-9647-eb7bae75deeb/crypt/zone" + }, + { + "zone": { + "id": "f010978d-346e-49cd-b265-7607a25685f9", + "underlay_address": "fd00:1122:3344:109::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::c]:32345", + "dataset": { + "pool_name": "oxp_9bc1dab8-2d2a-4f92-bdfb-94ebca7881f1" + } + } + }, + "root": "/pool/ext/9bc1dab8-2d2a-4f92-bdfb-94ebca7881f1/crypt/zone" + }, + { + "zone": { + "id": "daff4162-cc81-4586-a457-91d767b8f1d9", + "underlay_address": "fd00:1122:3344:109::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::6]:32345", + "dataset": { + "pool_name": "oxp_b9b5b50c-e823-41ae-9585-01b818883521" + } + } + }, + "root": "/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone" + }, + { + "zone": { + "id": "9f300d3d-e698-4cc8-be4c-1f81ac8c927f", + "underlay_address": "fd00:1122:3344:109::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::d]:32345", + "dataset": { + "pool_name": "oxp_f1d82c22-ad7d-4cda-9ab0-8f5f496d90ce" + } + } + }, + "root": "/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone" + }, + { + "zone": { + "id": "8db7c7be-da40-4a1c-9681-4d02606a7eb7", + "underlay_address": "fd00:1122:3344:109::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::9]:32345", + "dataset": { + "pool_name": "oxp_46d21f3d-23be-4361-b5c5-9d0f6ece5b8c" + } + } + }, + "root": "/pool/ext/b7a3032f-7b8c-4a6a-9fa2-e5773bfdbc94/crypt/zone" + }, + { + "zone": { + "id": "b990911b-805a-4f9d-bd83-e977f5b19a35", + "underlay_address": "fd00:1122:3344:109::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::4]:32345", + "dataset": { + "pool_name": "oxp_7e67cb32-0c00-4090-9647-eb7bae75deeb" + } + } + }, + "root": "/pool/ext/de682b18-afaf-4d53-b62e-934f6bd4a1f8/crypt/zone" + }, + { + "zone": { + "id": "c99392f5-8f30-41ac-9eeb-12d7f4b707f1", + "underlay_address": "fd00:1122:3344:109::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::b]:32345", + "dataset": { + "pool_name": "oxp_de682b18-afaf-4d53-b62e-934f6bd4a1f8" + } + } + }, + "root": "/pool/ext/46d21f3d-23be-4361-b5c5-9d0f6ece5b8c/crypt/zone" + }, + { + "zone": { + "id": "7f6cb339-9eb1-4866-8a4f-383bad25b36f", + "underlay_address": "fd00:1122:3344:109::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::5]:32345", + "dataset": { + "pool_name": "oxp_458cbfa3-3752-415d-8a3b-fb64e88468e1" + } + } + }, + "root": "/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone" + }, + { + "zone": { + "id": "11946372-f253-4648-b00c-c7874a7b2888", + "underlay_address": "fd00:1122:3344:109::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:109::8]:32345", + "dataset": { + "pool_name": "oxp_d73332f5-b2a5-46c0-94cf-c5c5712abfe8" + } + } + }, + "root": "/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone" + }, + { + "zone": { + "id": "58ece9e1-387f-4d2f-a42f-69cd34f9f380", + "underlay_address": "fd00:1122:3344:109::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:109::3]:32221", + "dataset": { + "pool_name": "oxp_7e67cb32-0c00-4090-9647-eb7bae75deeb" + } + } + }, + "root": "/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone" + }, + { + "zone": { + "id": "f016a25a-deb5-4f20-bdb0-2425c00d41a6", + "underlay_address": "fd00:1122:3344:109::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:109::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/b9b5b50c-e823-41ae-9585-01b818883521/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled2.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled2.json new file mode 100644 index 0000000000..ba6ab6f915 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled2.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "dd799dd4-03f9-451d-85e2-844155753a03", + "underlay_address": "fd00:1122:3344:10a::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::7]:32345", + "dataset": { + "pool_name": "oxp_7dcf3acc-bde9-4306-bb46-4c6a6cbbb7ba" + } + } + }, + "root": "/pool/ext/7dcf3acc-bde9-4306-bb46-4c6a6cbbb7ba/crypt/zone" + }, + { + "zone": { + "id": "dbf9346d-b46d-4402-bb44-92ce20fb5290", + "underlay_address": "fd00:1122:3344:10a::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::9]:32345", + "dataset": { + "pool_name": "oxp_9275d50f-da2c-4f84-9775-598a364309ad" + } + } + }, + "root": "/pool/ext/d83e36ef-dd7a-4cc2-be19-379b1114c031/crypt/zone" + }, + { + "zone": { + "id": "9a55ebdd-eeef-4954-b0a1-e32b04837f14", + "underlay_address": "fd00:1122:3344:10a::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::4]:32345", + "dataset": { + "pool_name": "oxp_7f30f77e-5998-4676-a226-b433b5940e77" + } + } + }, + "root": "/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone" + }, + { + "zone": { + "id": "bc2935f8-e4fa-4015-968e-f90985533a6a", + "underlay_address": "fd00:1122:3344:10a::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::6]:32345", + "dataset": { + "pool_name": "oxp_022c9d58-e91f-480d-bda6-0cf32ce3b1f5" + } + } + }, + "root": "/pool/ext/c395dcc3-6ece-4b3f-b143-e111a54ef7da/crypt/zone" + }, + { + "zone": { + "id": "63f8c861-fa1d-4121-92d9-7efa5ef7f5a0", + "underlay_address": "fd00:1122:3344:10a::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::a]:32345", + "dataset": { + "pool_name": "oxp_3c805784-f403-4d01-9eb0-4f77d0821980" + } + } + }, + "root": "/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone" + }, + { + "zone": { + "id": "4996dcf9-78de-4f69-94fa-c09cc86a8d3c", + "underlay_address": "fd00:1122:3344:10a::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::b]:32345", + "dataset": { + "pool_name": "oxp_f9fe9ce6-be0d-4974-bc30-78a8f1330496" + } + } + }, + "root": "/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone" + }, + { + "zone": { + "id": "36b9a4bf-7b30-4fe7-903d-3b722c79fa86", + "underlay_address": "fd00:1122:3344:10a::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::c]:32345", + "dataset": { + "pool_name": "oxp_cb1052e0-4c70-4d37-b979-dd55e6a25f08" + } + } + }, + "root": "/pool/ext/3c805784-f403-4d01-9eb0-4f77d0821980/crypt/zone" + }, + { + "zone": { + "id": "a109a902-6a27-41b6-a881-c353e28e5389", + "underlay_address": "fd00:1122:3344:10a::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::8]:32345", + "dataset": { + "pool_name": "oxp_d83e36ef-dd7a-4cc2-be19-379b1114c031" + } + } + }, + "root": "/pool/ext/d83e36ef-dd7a-4cc2-be19-379b1114c031/crypt/zone" + }, + { + "zone": { + "id": "d2a9a0bc-ea12-44e3-ac4a-904c76120d11", + "underlay_address": "fd00:1122:3344:10a::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::3]:32345", + "dataset": { + "pool_name": "oxp_c395dcc3-6ece-4b3f-b143-e111a54ef7da" + } + } + }, + "root": "/pool/ext/9898a289-2f0d-43a6-b053-850f6e784e9a/crypt/zone" + }, + { + "zone": { + "id": "b3c3e53b-d9ec-4dd8-bd2c-bd811319aa44", + "underlay_address": "fd00:1122:3344:10a::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10a::5]:32345", + "dataset": { + "pool_name": "oxp_9898a289-2f0d-43a6-b053-850f6e784e9a" + } + } + }, + "root": "/pool/ext/9275d50f-da2c-4f84-9775-598a364309ad/crypt/zone" + }, + { + "zone": { + "id": "7b445d3b-fd25-4538-ac3f-f439c66d1223", + "underlay_address": "fd00:1122:3344:10a::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10a::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/f9fe9ce6-be0d-4974-bc30-78a8f1330496/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled20.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled20.json new file mode 100644 index 0000000000..f02f1f05e5 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled20.json @@ -0,0 +1,198 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "4b49e669-264d-4bfb-8ab1-555b520b679c", + "underlay_address": "fd00:1122:3344:108::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::c]:32345", + "dataset": { + "pool_name": "oxp_799a1c86-9e1a-4626-91e2-a19f7ff5356e" + } + } + }, + "root": "/pool/ext/d2478613-b7c9-4bd3-856f-1fe8e9c903c2/crypt/zone" + }, + { + "zone": { + "id": "d802baae-9c3f-437a-85fe-cd72653b6db1", + "underlay_address": "fd00:1122:3344:108::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::5]:32345", + "dataset": { + "pool_name": "oxp_d2478613-b7c9-4bd3-856f-1fe8e9c903c2" + } + } + }, + "root": "/pool/ext/116f216c-e151-410f-82bf-8913904cf7b4/crypt/zone" + }, + { + "zone": { + "id": "e5f69e60-3421-49a4-8c1d-2db8cbb6a5e9", + "underlay_address": "fd00:1122:3344:108::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::b]:32345", + "dataset": { + "pool_name": "oxp_116f216c-e151-410f-82bf-8913904cf7b4" + } + } + }, + "root": "/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone" + }, + { + "zone": { + "id": "3e598962-ef8c-4cb6-bdfe-ec8563939d6a", + "underlay_address": "fd00:1122:3344:108::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::4]:32345", + "dataset": { + "pool_name": "oxp_ababce44-01d1-4c50-b389-f60464c5dde9" + } + } + }, + "root": "/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone" + }, + { + "zone": { + "id": "25355c9f-cc2b-4b24-8eaa-65190f8936a8", + "underlay_address": "fd00:1122:3344:108::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::d]:32345", + "dataset": { + "pool_name": "oxp_fed46d41-136d-4462-8782-359014efba59" + } + } + }, + "root": "/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone" + }, + { + "zone": { + "id": "efb2f16c-ebad-4192-b575-dcb4d9b1d5cd", + "underlay_address": "fd00:1122:3344:108::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::a]:32345", + "dataset": { + "pool_name": "oxp_bf509067-0165-456d-98ae-72c86378e626" + } + } + }, + "root": "/pool/ext/95220093-e3b8-4f7f-9f5a-cb32cb75180a/crypt/zone" + }, + { + "zone": { + "id": "89191f0d-4e0b-47fa-9a9e-fbe2a6db1385", + "underlay_address": "fd00:1122:3344:108::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::8]:32345", + "dataset": { + "pool_name": "oxp_eea15142-4635-4e40-b0b4-b0c4f13eca3c" + } + } + }, + "root": "/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone" + }, + { + "zone": { + "id": "e4589324-c528-49c7-9141-35e0a7af6947", + "underlay_address": "fd00:1122:3344:108::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::6]:32345", + "dataset": { + "pool_name": "oxp_95220093-e3b8-4f7f-9f5a-cb32cb75180a" + } + } + }, + "root": "/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone" + }, + { + "zone": { + "id": "95ebe94d-0e68-421d-9260-c30bd7fe4bd6", + "underlay_address": "fd00:1122:3344:108::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:108::3]:12221", + "external_ip": "45.154.216.35", + "nic": { + "id": "301aa595-f072-4da3-a533-99647b44a66a", + "kind": { + "type": "service", + "id": "95ebe94d-0e68-421d-9260-c30bd7fe4bd6" + }, + "name": "nexus-95ebe94d-0e68-421d-9260-c30bd7fe4bd6", + "ip": "172.30.2.5", + "mac": "A8:40:25:FF:F1:30", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "8.8.8.8" + ] + } + }, + "root": "/pool/ext/eea15142-4635-4e40-b0b4-b0c4f13eca3c/crypt/zone" + }, + { + "zone": { + "id": "4b7a7052-f8e8-4196-8d6b-315943986ce6", + "underlay_address": "fd00:1122:3344:108::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::7]:32345", + "dataset": { + "pool_name": "oxp_a549421c-2f12-45cc-b691-202f0a9bfa8b" + } + } + }, + "root": "/pool/ext/bf509067-0165-456d-98ae-72c86378e626/crypt/zone" + }, + { + "zone": { + "id": "71b8ff53-c781-47bb-8ddc-2c7129680542", + "underlay_address": "fd00:1122:3344:108::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:108::9]:32345", + "dataset": { + "pool_name": "oxp_9d19f891-a3d9-4c6e-b1e1-6b0b085a9440" + } + } + }, + "root": "/pool/ext/fed46d41-136d-4462-8782-359014efba59/crypt/zone" + }, + { + "zone": { + "id": "eaf7bf77-f4c2-4016-9909-4b88a27e9d9a", + "underlay_address": "fd00:1122:3344:108::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:108::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/ababce44-01d1-4c50-b389-f60464c5dde9/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled21.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled21.json new file mode 100644 index 0000000000..d6c19b96ed --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled21.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "a91e4af3-5d18-4b08-8cb6-0583db8f8842", + "underlay_address": "fd00:1122:3344:117::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::a]:32345", + "dataset": { + "pool_name": "oxp_4b2896b8-5f0e-42fb-a474-658b28421e65" + } + } + }, + "root": "/pool/ext/23393ed9-acee-4686-861f-7fc825af1249/crypt/zone" + }, + { + "zone": { + "id": "1ce74512-ce3a-4125-95f1-12c86e0275d5", + "underlay_address": "fd00:1122:3344:117::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::8]:32345", + "dataset": { + "pool_name": "oxp_46ece76f-ef00-4dd0-9f73-326c63959470" + } + } + }, + "root": "/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone" + }, + { + "zone": { + "id": "fef5d35f-9622-4dee-8635-d26e9f7f6869", + "underlay_address": "fd00:1122:3344:117::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::4]:32345", + "dataset": { + "pool_name": "oxp_e4d7c2e8-016b-4617-afb5-38a2d9c1b508" + } + } + }, + "root": "/pool/ext/e372bba3-ef60-466f-b819-a3d5b9acbe77/crypt/zone" + }, + { + "zone": { + "id": "4f024a31-cd38-4219-8381-9f1af70d1d54", + "underlay_address": "fd00:1122:3344:117::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::c]:32345", + "dataset": { + "pool_name": "oxp_7cb2a3c2-9d33-4c6a-af57-669f251cf4cf" + } + } + }, + "root": "/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone" + }, + { + "zone": { + "id": "d00e1d0b-e12f-420a-a4df-21e4cac176f6", + "underlay_address": "fd00:1122:3344:117::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::b]:32345", + "dataset": { + "pool_name": "oxp_e372bba3-ef60-466f-b819-a3d5b9acbe77" + } + } + }, + "root": "/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone" + }, + { + "zone": { + "id": "1598058a-6064-449e-b39c-1e3d345ed793", + "underlay_address": "fd00:1122:3344:117::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::5]:32345", + "dataset": { + "pool_name": "oxp_022a8d67-1e00-49f3-81ed-a0a1bc187cfa" + } + } + }, + "root": "/pool/ext/022a8d67-1e00-49f3-81ed-a0a1bc187cfa/crypt/zone" + }, + { + "zone": { + "id": "c723c4b8-3031-4b25-8c16-fe08bc0b5f00", + "underlay_address": "fd00:1122:3344:117::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::7]:32345", + "dataset": { + "pool_name": "oxp_23393ed9-acee-4686-861f-7fc825af1249" + } + } + }, + "root": "/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone" + }, + { + "zone": { + "id": "7751b307-888f-46c8-8787-75d2f3fdaef3", + "underlay_address": "fd00:1122:3344:117::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::9]:32345", + "dataset": { + "pool_name": "oxp_e54e53d4-f68f-4b19-b8c1-9d5ab42e51c1" + } + } + }, + "root": "/pool/ext/e372bba3-ef60-466f-b819-a3d5b9acbe77/crypt/zone" + }, + { + "zone": { + "id": "89413ff1-d5de-4931-8389-e84e7ea321af", + "underlay_address": "fd00:1122:3344:117::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::6]:32345", + "dataset": { + "pool_name": "oxp_1bd5955e-14a9-463f-adeb-f12bcb45a6c1" + } + } + }, + "root": "/pool/ext/1bd5955e-14a9-463f-adeb-f12bcb45a6c1/crypt/zone" + }, + { + "zone": { + "id": "287b0b24-72aa-41b5-a597-8523d84225ef", + "underlay_address": "fd00:1122:3344:117::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:117::3]:32345", + "dataset": { + "pool_name": "oxp_cfbd185d-e185-4aaa-a598-9216124ceec4" + } + } + }, + "root": "/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone" + }, + { + "zone": { + "id": "4728253e-c534-4a5b-b707-c64ac9a8eb8c", + "underlay_address": "fd00:1122:3344:117::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:117::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/cfbd185d-e185-4aaa-a598-9216124ceec4/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled22.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled22.json new file mode 100644 index 0000000000..1cd6fed362 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled22.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "49f20cd1-a8a3-4fa8-9209-59da60cd8f9b", + "underlay_address": "fd00:1122:3344:103::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::5]:32345", + "dataset": { + "pool_name": "oxp_13a9ef4a-f33a-4781-8f83-712c07a79b1f" + } + } + }, + "root": "/pool/ext/711eff4e-736c-478e-83aa-ae86f5efbf1d/crypt/zone" + }, + { + "zone": { + "id": "896fd564-f94e-496b-9fcf-ddfbfcfac9f7", + "underlay_address": "fd00:1122:3344:103::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::c]:32345", + "dataset": { + "pool_name": "oxp_0944c0a2-0fb7-4f51-bced-52cc257cd2f6" + } + } + }, + "root": "/pool/ext/bc54d8c5-955d-429d-84e0-a20a4e5e27a3/crypt/zone" + }, + { + "zone": { + "id": "911fb8b3-05c2-4af7-8974-6c74a61d94ad", + "underlay_address": "fd00:1122:3344:103::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::9]:32345", + "dataset": { + "pool_name": "oxp_29f59fce-a867-4571-9d2e-b03fa5c13510" + } + } + }, + "root": "/pool/ext/711eff4e-736c-478e-83aa-ae86f5efbf1d/crypt/zone" + }, + { + "zone": { + "id": "682b34db-0b06-4770-a8fe-74437cf184d6", + "underlay_address": "fd00:1122:3344:103::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::6]:32345", + "dataset": { + "pool_name": "oxp_094d11d2-8049-4138-bcf4-562f5f8e77c0" + } + } + }, + "root": "/pool/ext/0944c0a2-0fb7-4f51-bced-52cc257cd2f6/crypt/zone" + }, + { + "zone": { + "id": "d8d20365-ecd3-4fd5-9495-c0670e3bd5d9", + "underlay_address": "fd00:1122:3344:103::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::a]:32345", + "dataset": { + "pool_name": "oxp_fb97ff7b-0225-400c-a137-3b38a786c0a0" + } + } + }, + "root": "/pool/ext/094d11d2-8049-4138-bcf4-562f5f8e77c0/crypt/zone" + }, + { + "zone": { + "id": "673620b6-44d9-4310-8e17-3024ac84e708", + "underlay_address": "fd00:1122:3344:103::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::7]:32345", + "dataset": { + "pool_name": "oxp_711eff4e-736c-478e-83aa-ae86f5efbf1d" + } + } + }, + "root": "/pool/ext/fb97ff7b-0225-400c-a137-3b38a786c0a0/crypt/zone" + }, + { + "zone": { + "id": "bf6dfc04-4d4c-41b6-a011-40ffc3bc5080", + "underlay_address": "fd00:1122:3344:103::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::8]:32345", + "dataset": { + "pool_name": "oxp_f815f1b6-48ef-436d-8768-eb08227e2386" + } + } + }, + "root": "/pool/ext/13a9ef4a-f33a-4781-8f83-712c07a79b1f/crypt/zone" + }, + { + "zone": { + "id": "ac8a82a8-fb6f-4635-a9a9-d98617eab390", + "underlay_address": "fd00:1122:3344:103::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::3]:32345", + "dataset": { + "pool_name": "oxp_97d6c860-4e2f-496e-974b-2e293fee6af9" + } + } + }, + "root": "/pool/ext/0944c0a2-0fb7-4f51-bced-52cc257cd2f6/crypt/zone" + }, + { + "zone": { + "id": "4ed66558-4815-4b85-9b94-9edf3ee69ead", + "underlay_address": "fd00:1122:3344:103::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::4]:32345", + "dataset": { + "pool_name": "oxp_bc54d8c5-955d-429d-84e0-a20a4e5e27a3" + } + } + }, + "root": "/pool/ext/13a9ef4a-f33a-4781-8f83-712c07a79b1f/crypt/zone" + }, + { + "zone": { + "id": "8a71c6ee-b08d-4c3d-b13c-c9cebc4c328a", + "underlay_address": "fd00:1122:3344:103::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::b]:32345", + "dataset": { + "pool_name": "oxp_2bdfa429-09bd-4fa1-aa20-eea99f0d2b85" + } + } + }, + "root": "/pool/ext/29f59fce-a867-4571-9d2e-b03fa5c13510/crypt/zone" + }, + { + "zone": { + "id": "7e6b8962-7a1e-4d7b-b7ea-49e64a51d98d", + "underlay_address": "fd00:1122:3344:103::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:103::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/2bdfa429-09bd-4fa1-aa20-eea99f0d2b85/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled23.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled23.json new file mode 100644 index 0000000000..ab171ad8cd --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled23.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "6b7e931d-4b91-4dc6-9a7b-4c19ac669e5d", + "underlay_address": "fd00:1122:3344:105::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::4]:32345", + "dataset": { + "pool_name": "oxp_24dab7f5-164a-47f3-a878-f32ab1e68cce" + } + } + }, + "root": "/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone" + }, + { + "zone": { + "id": "6c58e7aa-71e1-4868-9d4b-e12c7ef40303", + "underlay_address": "fd00:1122:3344:105::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::a]:32345", + "dataset": { + "pool_name": "oxp_d664c9e8-bc81-4225-a618-a8ae2d057186" + } + } + }, + "root": "/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone" + }, + { + "zone": { + "id": "51c6dc8d-b1a4-454a-9b19-01e45eb0b599", + "underlay_address": "fd00:1122:3344:105::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::d]:32345", + "dataset": { + "pool_name": "oxp_f5f85537-eb25-4d0e-8e94-b775c41abd73" + } + } + }, + "root": "/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone" + }, + { + "zone": { + "id": "8cbffa61-0bd0-4ad2-bd7d-30fe0dd57469", + "underlay_address": "fd00:1122:3344:105::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::9]:32345", + "dataset": { + "pool_name": "oxp_88abca38-3f61-4d4b-80a1-4ea3e4827f84" + } + } + }, + "root": "/pool/ext/88abca38-3f61-4d4b-80a1-4ea3e4827f84/crypt/zone" + }, + { + "zone": { + "id": "2177f37f-2ac9-4e66-bf74-a10bd91f4d33", + "underlay_address": "fd00:1122:3344:105::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::6]:32345", + "dataset": { + "pool_name": "oxp_59e20871-4670-40d6-8ff4-aa97899fc991" + } + } + }, + "root": "/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone" + }, + { + "zone": { + "id": "e4e43855-4879-4910-a2ba-40f625c1cc2d", + "underlay_address": "fd00:1122:3344:105::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::b]:32345", + "dataset": { + "pool_name": "oxp_967d2f05-b141-44f5-837d-9b2aa67ee128" + } + } + }, + "root": "/pool/ext/6b6f34cd-6d3d-4832-a4e6-3df112c97133/crypt/zone" + }, + { + "zone": { + "id": "8d2517e1-f9ad-40f2-abb9-2f5122839910", + "underlay_address": "fd00:1122:3344:105::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::7]:32345", + "dataset": { + "pool_name": "oxp_ad493851-2d11-4c2d-8d75-989579d9616a" + } + } + }, + "root": "/pool/ext/88abca38-3f61-4d4b-80a1-4ea3e4827f84/crypt/zone" + }, + { + "zone": { + "id": "44cb3698-a7b1-4388-9165-ac76082ec8bc", + "underlay_address": "fd00:1122:3344:105::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::5]:32345", + "dataset": { + "pool_name": "oxp_4292a83c-8c1f-4b2e-9120-72e0c510bf3c" + } + } + }, + "root": "/pool/ext/24dab7f5-164a-47f3-a878-f32ab1e68cce/crypt/zone" + }, + { + "zone": { + "id": "931b5c86-9d72-4518-bfd6-97863152ac65", + "underlay_address": "fd00:1122:3344:105::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::c]:32345", + "dataset": { + "pool_name": "oxp_6b6f34cd-6d3d-4832-a4e6-3df112c97133" + } + } + }, + "root": "/pool/ext/ad493851-2d11-4c2d-8d75-989579d9616a/crypt/zone" + }, + { + "zone": { + "id": "ac568073-1889-463e-8cc4-cfed16ce2a34", + "underlay_address": "fd00:1122:3344:105::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:105::8]:32345", + "dataset": { + "pool_name": "oxp_4f1eafe9-b28d-49d3-83e2-ceac8721d6b5" + } + } + }, + "root": "/pool/ext/4292a83c-8c1f-4b2e-9120-72e0c510bf3c/crypt/zone" + }, + { + "zone": { + "id": "e8f86fbb-864e-4d5a-961c-b50b54ae853e", + "underlay_address": "fd00:1122:3344:105::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:105::3]:32221", + "dataset": { + "pool_name": "oxp_24dab7f5-164a-47f3-a878-f32ab1e68cce" + } + } + }, + "root": "/pool/ext/4f1eafe9-b28d-49d3-83e2-ceac8721d6b5/crypt/zone" + }, + { + "zone": { + "id": "c79caea0-37b1-49d6-ae6e-8cf849d91374", + "underlay_address": "fd00:1122:3344:105::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:105::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/24dab7f5-164a-47f3-a878-f32ab1e68cce/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled24.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled24.json new file mode 100644 index 0000000000..9968abe6d9 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled24.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "d2b1e468-bc3c-4d08-b855-ae3327465375", + "underlay_address": "fd00:1122:3344:106::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::3]:32345", + "dataset": { + "pool_name": "oxp_9db196bf-828d-4e55-a2c1-dd9d579d3908" + } + } + }, + "root": "/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone" + }, + { + "zone": { + "id": "61f94a16-79fd-42e3-b225-a4dc67228437", + "underlay_address": "fd00:1122:3344:106::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::6]:32345", + "dataset": { + "pool_name": "oxp_d77d5b08-5f70-496a-997b-b38804dc3b8a" + } + } + }, + "root": "/pool/ext/daf9e3cd-5a40-4eba-a0f6-4f94dab37dae/crypt/zone" + }, + { + "zone": { + "id": "7d32ef34-dec5-4fd8-899e-20bbc473a3ee", + "underlay_address": "fd00:1122:3344:106::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::7]:32345", + "dataset": { + "pool_name": "oxp_50c1b653-6231-41fe-b3cf-b7ba709a0746" + } + } + }, + "root": "/pool/ext/9db196bf-828d-4e55-a2c1-dd9d579d3908/crypt/zone" + }, + { + "zone": { + "id": "c34b7ae5-26b9-4651-a3c4-20bba2bd0d2c", + "underlay_address": "fd00:1122:3344:106::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::5]:32345", + "dataset": { + "pool_name": "oxp_88aea92c-ab92-44c1-9471-eb8e30e075d3" + } + } + }, + "root": "/pool/ext/8da316d4-6b18-4980-a0a8-6e76e72cc40d/crypt/zone" + }, + { + "zone": { + "id": "36472be8-9a70-4c14-bd02-439b725cec1a", + "underlay_address": "fd00:1122:3344:106::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::8]:32345", + "dataset": { + "pool_name": "oxp_54544b3a-1513-4db2-911e-7c1eb4b12385" + } + } + }, + "root": "/pool/ext/54544b3a-1513-4db2-911e-7c1eb4b12385/crypt/zone" + }, + { + "zone": { + "id": "2548f8ab-5255-4334-a1fb-5d7d95213129", + "underlay_address": "fd00:1122:3344:106::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::9]:32345", + "dataset": { + "pool_name": "oxp_08050450-967f-431c-9a12-0d051aff020e" + } + } + }, + "root": "/pool/ext/08050450-967f-431c-9a12-0d051aff020e/crypt/zone" + }, + { + "zone": { + "id": "1455c069-853c-49cd-853a-3ea81b89acd4", + "underlay_address": "fd00:1122:3344:106::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::c]:32345", + "dataset": { + "pool_name": "oxp_8da316d4-6b18-4980-a0a8-6e76e72cc40d" + } + } + }, + "root": "/pool/ext/08050450-967f-431c-9a12-0d051aff020e/crypt/zone" + }, + { + "zone": { + "id": "27c0244b-f91a-46c3-bc96-e8eec009371e", + "underlay_address": "fd00:1122:3344:106::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::b]:32345", + "dataset": { + "pool_name": "oxp_daf9e3cd-5a40-4eba-a0f6-4f94dab37dae" + } + } + }, + "root": "/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone" + }, + { + "zone": { + "id": "9e46d837-1e0f-42b6-a352-84e6946b8734", + "underlay_address": "fd00:1122:3344:106::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::4]:32345", + "dataset": { + "pool_name": "oxp_74df4c92-edbb-4431-a770-1d015110e66b" + } + } + }, + "root": "/pool/ext/15f94c39-d48c-41f6-a913-cc1d04aef1a2/crypt/zone" + }, + { + "zone": { + "id": "b972fcd4-c1b3-4b3c-9e24-f59c7a7cb192", + "underlay_address": "fd00:1122:3344:106::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:106::a]:32345", + "dataset": { + "pool_name": "oxp_15f94c39-d48c-41f6-a913-cc1d04aef1a2" + } + } + }, + "root": "/pool/ext/74df4c92-edbb-4431-a770-1d015110e66b/crypt/zone" + }, + { + "zone": { + "id": "e1c8c655-1950-42d5-ae1f-a4ce84854bbc", + "underlay_address": "fd00:1122:3344:106::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:106::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/15f94c39-d48c-41f6-a913-cc1d04aef1a2/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled25.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled25.json new file mode 100644 index 0000000000..8deca6b56a --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled25.json @@ -0,0 +1,196 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "10b80058-9b2e-4d6c-8a1a-a61a8258c12f", + "underlay_address": "fd00:1122:3344:118::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::9]:32345", + "dataset": { + "pool_name": "oxp_953c19bb-9fff-4488-8a7b-29de9994a948" + } + } + }, + "root": "/pool/ext/a78caf97-6145-4908-83b5-a03a6d2e0ac4/crypt/zone" + }, + { + "zone": { + "id": "f58fef96-7b5e-40c2-9482-669088a19209", + "underlay_address": "fd00:1122:3344:118::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::d]:32345", + "dataset": { + "pool_name": "oxp_d7976706-d6ed-4465-8b04-450c96d8feec" + } + } + }, + "root": "/pool/ext/d7976706-d6ed-4465-8b04-450c96d8feec/crypt/zone" + }, + { + "zone": { + "id": "624f1168-47b6-4aa1-84da-e20a0d74d783", + "underlay_address": "fd00:1122:3344:118::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::b]:32345", + "dataset": { + "pool_name": "oxp_a78caf97-6145-4908-83b5-a03a6d2e0ac4" + } + } + }, + "root": "/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone" + }, + { + "zone": { + "id": "8ea85412-19b4-45c1-a53c-027ddd629296", + "underlay_address": "fd00:1122:3344:118::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::6]:32345", + "dataset": { + "pool_name": "oxp_d5f4c903-155a-4c91-aadd-6039a4f64821" + } + } + }, + "root": "/pool/ext/7d2a7685-c1c9-4d2d-a2bb-df65d96ea3e2/crypt/zone" + }, + { + "zone": { + "id": "fd226b82-71d7-4719-b32c-a6c7abe28a2a", + "underlay_address": "fd00:1122:3344:118::3", + "zone_type": { + "type": "external_dns", + "dataset": { + "pool_name": "oxp_84a80b58-70e9-439c-9558-5b343d9a4b53" + }, + "http_address": "[fd00:1122:3344:118::3]:5353", + "dns_address": "45.154.216.34:53", + "nic": { + "id": "7f72b6fd-1120-44dc-b3a7-f727502ba47c", + "kind": { + "type": "service", + "id": "fd226b82-71d7-4719-b32c-a6c7abe28a2a" + }, + "name": "external-dns-fd226b82-71d7-4719-b32c-a6c7abe28a2a", + "ip": "172.30.1.6", + "mac": "A8:40:25:FF:9E:D1", + "subnet": "172.30.1.0/24", + "vni": 100, + "primary": true, + "slot": 0 + } + } + }, + "root": "/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone" + }, + { + "zone": { + "id": "08d0c38d-f0d9-45b9-856d-b85059fe5f07", + "underlay_address": "fd00:1122:3344:118::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::4]:32345", + "dataset": { + "pool_name": "oxp_84a80b58-70e9-439c-9558-5b343d9a4b53" + } + } + }, + "root": "/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone" + }, + { + "zone": { + "id": "5de7d3fd-4a3f-4fdd-b6b2-d1186e16dce5", + "underlay_address": "fd00:1122:3344:118::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::7]:32345", + "dataset": { + "pool_name": "oxp_d76e058f-2d1e-4b15-b3a0-e5509a246876" + } + } + }, + "root": "/pool/ext/a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d/crypt/zone" + }, + { + "zone": { + "id": "5d0f5cad-10b3-497c-903b-eeeabce920e2", + "underlay_address": "fd00:1122:3344:118::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::8]:32345", + "dataset": { + "pool_name": "oxp_3a3ad639-8800-4951-bc2a-201d269e47a2" + } + } + }, + "root": "/pool/ext/3a3ad639-8800-4951-bc2a-201d269e47a2/crypt/zone" + }, + { + "zone": { + "id": "39f9cefa-801c-4843-9fb9-05446ffbdd1a", + "underlay_address": "fd00:1122:3344:118::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::a]:32345", + "dataset": { + "pool_name": "oxp_7d2a7685-c1c9-4d2d-a2bb-df65d96ea3e2" + } + } + }, + "root": "/pool/ext/a78caf97-6145-4908-83b5-a03a6d2e0ac4/crypt/zone" + }, + { + "zone": { + "id": "0711e710-7fdd-4e68-94c8-294b8677e804", + "underlay_address": "fd00:1122:3344:118::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::5]:32345", + "dataset": { + "pool_name": "oxp_a5b16ffe-a834-4a83-a4e9-487d4cbb7e3d" + } + } + }, + "root": "/pool/ext/3a3ad639-8800-4951-bc2a-201d269e47a2/crypt/zone" + }, + { + "zone": { + "id": "318a62cc-5c6c-4805-9fb6-c0f6a75ce31c", + "underlay_address": "fd00:1122:3344:118::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:118::c]:32345", + "dataset": { + "pool_name": "oxp_1d5f0ba3-6b31-4cea-a9a9-2065a538887d" + } + } + }, + "root": "/pool/ext/d7976706-d6ed-4465-8b04-450c96d8feec/crypt/zone" + }, + { + "zone": { + "id": "463d0498-85b9-40eb-af96-d99af58a587c", + "underlay_address": "fd00:1122:3344:118::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:118::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/d5f4c903-155a-4c91-aadd-6039a4f64821/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled26.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled26.json new file mode 100644 index 0000000000..a3c5d97b53 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled26.json @@ -0,0 +1,178 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "d8b3de97-cc79-48f6-83ad-02017c21223b", + "underlay_address": "fd00:1122:3344:119::3", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:119::3]:17000" + } + }, + "root": "/pool/ext/e0faea44-8b5c-40b0-bb75-a1aec1a10377/crypt/zone" + }, + { + "zone": { + "id": "adba1a3b-5bac-44d5-aa5a-879dc6eadb5f", + "underlay_address": "fd00:1122:3344:119::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::c]:32345", + "dataset": { + "pool_name": "oxp_21c339c3-6461-4bdb-8b0e-c0f9f08ee10b" + } + } + }, + "root": "/pool/ext/f5c73c28-2168-4321-b737-4ca6663155c9/crypt/zone" + }, + { + "zone": { + "id": "42bb9833-5c39-4aba-b2c4-da2ca1287728", + "underlay_address": "fd00:1122:3344:119::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::a]:32345", + "dataset": { + "pool_name": "oxp_1f91451d-a466-4c9a-a6e6-0abd7985595f" + } + } + }, + "root": "/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone" + }, + { + "zone": { + "id": "197695e1-d949-4982-b679-6e5c9ab4bcc7", + "underlay_address": "fd00:1122:3344:119::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::b]:32345", + "dataset": { + "pool_name": "oxp_e0faea44-8b5c-40b0-bb75-a1aec1a10377" + } + } + }, + "root": "/pool/ext/b31e1815-cae0-4145-940c-874fff63bdd5/crypt/zone" + }, + { + "zone": { + "id": "bf99d4f8-edf1-4de5-98d4-8e6a24965005", + "underlay_address": "fd00:1122:3344:119::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::8]:32345", + "dataset": { + "pool_name": "oxp_ef2c3afb-6962-4f6b-b567-14766bbd9ec0" + } + } + }, + "root": "/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone" + }, + { + "zone": { + "id": "390d1853-8be9-4987-b8b6-f022999bf4e7", + "underlay_address": "fd00:1122:3344:119::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::7]:32345", + "dataset": { + "pool_name": "oxp_06eed00a-d8d3-4b9d-84c9-23fce535f63e" + } + } + }, + "root": "/pool/ext/ef2c3afb-6962-4f6b-b567-14766bbd9ec0/crypt/zone" + }, + { + "zone": { + "id": "76fe2161-90df-41b5-9c94-067de9c29db1", + "underlay_address": "fd00:1122:3344:119::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::4]:32345", + "dataset": { + "pool_name": "oxp_f5c73c28-2168-4321-b737-4ca6663155c9" + } + } + }, + "root": "/pool/ext/ef2c3afb-6962-4f6b-b567-14766bbd9ec0/crypt/zone" + }, + { + "zone": { + "id": "f49dc522-2b13-4055-964c-8315671096aa", + "underlay_address": "fd00:1122:3344:119::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::d]:32345", + "dataset": { + "pool_name": "oxp_662c278b-7f5f-4c7e-91ff-70207e8a307b" + } + } + }, + "root": "/pool/ext/1f91451d-a466-4c9a-a6e6-0abd7985595f/crypt/zone" + }, + { + "zone": { + "id": "08cc7bd6-368e-4d16-a619-28b17eff35af", + "underlay_address": "fd00:1122:3344:119::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::9]:32345", + "dataset": { + "pool_name": "oxp_5516b9ac-b139-40da-aa3b-f094568ba095" + } + } + }, + "root": "/pool/ext/06eed00a-d8d3-4b9d-84c9-23fce535f63e/crypt/zone" + }, + { + "zone": { + "id": "74b0613f-bce8-4922-93e0-b5bfccfc8443", + "underlay_address": "fd00:1122:3344:119::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::5]:32345", + "dataset": { + "pool_name": "oxp_b31e1815-cae0-4145-940c-874fff63bdd5" + } + } + }, + "root": "/pool/ext/21c339c3-6461-4bdb-8b0e-c0f9f08ee10b/crypt/zone" + }, + { + "zone": { + "id": "55fcfc62-8435-475f-a2aa-29373901b993", + "underlay_address": "fd00:1122:3344:119::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:119::6]:32345", + "dataset": { + "pool_name": "oxp_eadf6a03-1028-4d48-ac0d-0d27ef2c8c0f" + } + } + }, + "root": "/pool/ext/1f91451d-a466-4c9a-a6e6-0abd7985595f/crypt/zone" + }, + { + "zone": { + "id": "d52ccea3-6d7f-43a6-a19f-e0409f4e9cdc", + "underlay_address": "fd00:1122:3344:119::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:119::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/f5c73c28-2168-4321-b737-4ca6663155c9/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled27.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled27.json new file mode 100644 index 0000000000..193df7a567 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled27.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "095e612f-e218-4a16-aa6e-98c3d69a470a", + "underlay_address": "fd00:1122:3344:10d::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::a]:32345", + "dataset": { + "pool_name": "oxp_9f657858-623f-4d78-9841-6e620b5ede30" + } + } + }, + "root": "/pool/ext/2d086b51-2b77-4bc7-adc6-43586ea38ce9/crypt/zone" + }, + { + "zone": { + "id": "de818730-0e3b-4567-94e7-344bd9b6f564", + "underlay_address": "fd00:1122:3344:10d::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::3]:32345", + "dataset": { + "pool_name": "oxp_ba6ab301-07e1-4d35-80ac-59612f2c2bdb" + } + } + }, + "root": "/pool/ext/7cee2806-e898-47d8-b568-e276a6e271f8/crypt/zone" + }, + { + "zone": { + "id": "6a21dc3c-3a9d-4520-9a91-7d8f2737bcd4", + "underlay_address": "fd00:1122:3344:10d::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::4]:32345", + "dataset": { + "pool_name": "oxp_7cee2806-e898-47d8-b568-e276a6e271f8" + } + } + }, + "root": "/pool/ext/cef23d87-31ed-40d5-99b8-12d7be8e46e7/crypt/zone" + }, + { + "zone": { + "id": "e01b7f45-b8d7-4944-ba5b-41fb699889a9", + "underlay_address": "fd00:1122:3344:10d::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::b]:32345", + "dataset": { + "pool_name": "oxp_d9af8878-50bd-4425-95d9-e6556ce92cfa" + } + } + }, + "root": "/pool/ext/6fe9bcaa-88cb-451d-b086-24a3ad53fa22/crypt/zone" + }, + { + "zone": { + "id": "4271ef62-d319-4e80-b157-915321cec8c7", + "underlay_address": "fd00:1122:3344:10d::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::c]:32345", + "dataset": { + "pool_name": "oxp_ba8ee7dd-cdfb-48bd-92ce-4dc45e070930" + } + } + }, + "root": "/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone" + }, + { + "zone": { + "id": "6bdcc159-aeb9-4903-9486-dd8b43a3dc16", + "underlay_address": "fd00:1122:3344:10d::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::8]:32345", + "dataset": { + "pool_name": "oxp_5b03a5dc-bb5a-4bf4-bc21-0af849cd1dab" + } + } + }, + "root": "/pool/ext/d9af8878-50bd-4425-95d9-e6556ce92cfa/crypt/zone" + }, + { + "zone": { + "id": "85540e54-cdd7-4baa-920c-5cf54cbc1f83", + "underlay_address": "fd00:1122:3344:10d::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::7]:32345", + "dataset": { + "pool_name": "oxp_ee24f9a6-84ab-49a5-a28f-e394abfcaa95" + } + } + }, + "root": "/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone" + }, + { + "zone": { + "id": "750d1a0b-6a14-46c5-9a0b-a504caefb198", + "underlay_address": "fd00:1122:3344:10d::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::9]:32345", + "dataset": { + "pool_name": "oxp_cef23d87-31ed-40d5-99b8-12d7be8e46e7" + } + } + }, + "root": "/pool/ext/ba8ee7dd-cdfb-48bd-92ce-4dc45e070930/crypt/zone" + }, + { + "zone": { + "id": "b5996893-1a9a-434e-a257-d702694f058b", + "underlay_address": "fd00:1122:3344:10d::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::6]:32345", + "dataset": { + "pool_name": "oxp_2d086b51-2b77-4bc7-adc6-43586ea38ce9" + } + } + }, + "root": "/pool/ext/7cee2806-e898-47d8-b568-e276a6e271f8/crypt/zone" + }, + { + "zone": { + "id": "8b36686a-b98d-451a-9124-a3583000a83a", + "underlay_address": "fd00:1122:3344:10d::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10d::5]:32345", + "dataset": { + "pool_name": "oxp_6fe9bcaa-88cb-451d-b086-24a3ad53fa22" + } + } + }, + "root": "/pool/ext/9f657858-623f-4d78-9841-6e620b5ede30/crypt/zone" + }, + { + "zone": { + "id": "88d695a2-c8c1-41af-85b0-77424f4d650d", + "underlay_address": "fd00:1122:3344:10d::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10d::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/ba6ab301-07e1-4d35-80ac-59612f2c2bdb/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled28.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled28.json new file mode 100644 index 0000000000..210b388a19 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled28.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "a126365d-f459-43bf-9f99-dbe1c4cdecf8", + "underlay_address": "fd00:1122:3344:113::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::4]:32345", + "dataset": { + "pool_name": "oxp_c99eabb2-6815-416a-9660-87e2609b357a" + } + } + }, + "root": "/pool/ext/6461a450-f043-4d1e-bc03-4a68ed5fe94a/crypt/zone" + }, + { + "zone": { + "id": "52f57ef8-546a-43bd-a0f3-8c42b99c37a6", + "underlay_address": "fd00:1122:3344:113::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::3]:32345", + "dataset": { + "pool_name": "oxp_f6530e9c-6d64-44fa-93d5-ae427916fbf1" + } + } + }, + "root": "/pool/ext/97662260-6b62-450f-9d7e-42f7dee5d568/crypt/zone" + }, + { + "zone": { + "id": "3ee87855-9423-43ff-800a-fa4fdbf1d956", + "underlay_address": "fd00:1122:3344:113::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::a]:32345", + "dataset": { + "pool_name": "oxp_6461a450-f043-4d1e-bc03-4a68ed5fe94a" + } + } + }, + "root": "/pool/ext/9515dc86-fe62-4d4f-b38d-b3461cc042fc/crypt/zone" + }, + { + "zone": { + "id": "55d0ddf9-9b24-4a7a-b97f-248e240f9ba6", + "underlay_address": "fd00:1122:3344:113::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::5]:32345", + "dataset": { + "pool_name": "oxp_97662260-6b62-450f-9d7e-42f7dee5d568" + } + } + }, + "root": "/pool/ext/9515dc86-fe62-4d4f-b38d-b3461cc042fc/crypt/zone" + }, + { + "zone": { + "id": "014cad37-56a7-4b2a-9c9e-505b15b4de85", + "underlay_address": "fd00:1122:3344:113::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::b]:32345", + "dataset": { + "pool_name": "oxp_8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90" + } + } + }, + "root": "/pool/ext/6461a450-f043-4d1e-bc03-4a68ed5fe94a/crypt/zone" + }, + { + "zone": { + "id": "e14fb192-aaab-42ab-aa86-c85f13955940", + "underlay_address": "fd00:1122:3344:113::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::6]:32345", + "dataset": { + "pool_name": "oxp_5a9455ca-fb01-4549-9a70-7579c031779d" + } + } + }, + "root": "/pool/ext/f6530e9c-6d64-44fa-93d5-ae427916fbf1/crypt/zone" + }, + { + "zone": { + "id": "14540609-9371-442b-8486-88c244e97cd4", + "underlay_address": "fd00:1122:3344:113::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::8]:32345", + "dataset": { + "pool_name": "oxp_2916d6f3-8775-4887-a6d3-f9723982756f" + } + } + }, + "root": "/pool/ext/8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90/crypt/zone" + }, + { + "zone": { + "id": "97a6b35f-0af9-41eb-93a1-f8bc5dbba357", + "underlay_address": "fd00:1122:3344:113::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::7]:32345", + "dataset": { + "pool_name": "oxp_9515dc86-fe62-4d4f-b38d-b3461cc042fc" + } + } + }, + "root": "/pool/ext/8529ce8e-21d2-4b23-b9fd-6b90c7ae4f90/crypt/zone" + }, + { + "zone": { + "id": "5734aa24-cb66-4b0a-9eb2-564646f8d729", + "underlay_address": "fd00:1122:3344:113::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::9]:32345", + "dataset": { + "pool_name": "oxp_9f889a6c-17b1-4edd-9659-458d91439dc1" + } + } + }, + "root": "/pool/ext/a5074e7f-8d3b-40e0-a79e-dbd9af9d5693/crypt/zone" + }, + { + "zone": { + "id": "ba86eca1-1427-4540-b4a6-1d9a0e1bc656", + "underlay_address": "fd00:1122:3344:113::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:113::c]:32345", + "dataset": { + "pool_name": "oxp_a5074e7f-8d3b-40e0-a79e-dbd9af9d5693" + } + } + }, + "root": "/pool/ext/2916d6f3-8775-4887-a6d3-f9723982756f/crypt/zone" + }, + { + "zone": { + "id": "6634dbc4-d22f-40a4-8cd3-4f271d781fa1", + "underlay_address": "fd00:1122:3344:113::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:113::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/a5074e7f-8d3b-40e0-a79e-dbd9af9d5693/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled29.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled29.json new file mode 100644 index 0000000000..ccd1bd65be --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled29.json @@ -0,0 +1,184 @@ +{ + "omicron_generation": 2, + "ledger_generation": 5, + "zones": [ + { + "zone": { + "id": "1cdd1ebf-9321-4f2d-914c-1e617f60b41a", + "underlay_address": "fd00:1122:3344:120::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::8]:32345", + "dataset": { + "pool_name": "oxp_74046573-78a2-46b4-86dc-40bb2ee29dd5" + } + } + }, + "root": "/pool/ext/c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e/crypt/zone" + }, + { + "zone": { + "id": "720a0d08-d1c0-43ba-af86-f2dac1a53639", + "underlay_address": "fd00:1122:3344:120::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::c]:32345", + "dataset": { + "pool_name": "oxp_068d2790-1044-41ed-97a5-b493490b14d1" + } + } + }, + "root": "/pool/ext/86cd16cf-d00d-40bc-b14a-8220b1e11476/crypt/zone" + }, + { + "zone": { + "id": "d9f0b97b-2cef-4155-b45f-7db89263e4cf", + "underlay_address": "fd00:1122:3344:120::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::9]:32345", + "dataset": { + "pool_name": "oxp_8171bf0d-e61e-43f9-87d6-ec8833b80102" + } + } + }, + "root": "/pool/ext/86cd16cf-d00d-40bc-b14a-8220b1e11476/crypt/zone" + }, + { + "zone": { + "id": "018edff1-0d95-45a3-9a01-39c419bec55a", + "underlay_address": "fd00:1122:3344:120::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::b]:32345", + "dataset": { + "pool_name": "oxp_0b11e026-f265-49a0-935f-7b234c19c789" + } + } + }, + "root": "/pool/ext/35db8700-d6a7-498c-9d2c-08eb9ab41b7c/crypt/zone" + }, + { + "zone": { + "id": "f8cc1c1e-a556-436c-836d-42052101c38a", + "underlay_address": "fd00:1122:3344:120::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::3]:32345", + "dataset": { + "pool_name": "oxp_ed8e5a26-5591-405a-b792-408f5b16e444" + } + } + }, + "root": "/pool/ext/1069bdee-fe5a-4164-a856-ff8ae56c07fb/crypt/zone" + }, + { + "zone": { + "id": "f9600313-fac0-45a1-a1b5-02dd6af468b9", + "underlay_address": "fd00:1122:3344:120::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::4]:32345", + "dataset": { + "pool_name": "oxp_c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e" + } + } + }, + "root": "/pool/ext/74046573-78a2-46b4-86dc-40bb2ee29dd5/crypt/zone" + }, + { + "zone": { + "id": "869e4f7c-5312-4b98-bacc-1508f236bf5a", + "underlay_address": "fd00:1122:3344:120::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::6]:32345", + "dataset": { + "pool_name": "oxp_04aea8dc-4316-432f-a13a-d7d9b2efa3f2" + } + } + }, + "root": "/pool/ext/0b11e026-f265-49a0-935f-7b234c19c789/crypt/zone" + }, + { + "zone": { + "id": "31ed5a0c-7caf-4825-b730-85ee94fe27f1", + "underlay_address": "fd00:1122:3344:120::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::a]:32345", + "dataset": { + "pool_name": "oxp_86cd16cf-d00d-40bc-b14a-8220b1e11476" + } + } + }, + "root": "/pool/ext/04aea8dc-4316-432f-a13a-d7d9b2efa3f2/crypt/zone" + }, + { + "zone": { + "id": "7e5a3c39-152a-4270-b01e-9e144cca4aaa", + "underlay_address": "fd00:1122:3344:120::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::5]:32345", + "dataset": { + "pool_name": "oxp_1069bdee-fe5a-4164-a856-ff8ae56c07fb" + } + } + }, + "root": "/pool/ext/04aea8dc-4316-432f-a13a-d7d9b2efa3f2/crypt/zone" + }, + { + "zone": { + "id": "9a03a386-7304-4a86-bee8-153ef643195e", + "underlay_address": "fd00:1122:3344:120::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:120::7]:32345", + "dataset": { + "pool_name": "oxp_35db8700-d6a7-498c-9d2c-08eb9ab41b7c" + } + } + }, + "root": "/pool/ext/068d2790-1044-41ed-97a5-b493490b14d1/crypt/zone" + }, + { + "zone": { + "id": "a800d0a7-1020-481c-8be8-ecfd28b7a2be", + "underlay_address": "fd00:1122:3344:120::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:120::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/c1f0a9e4-ea10-4fd9-8b6d-79a2bacfec5e/crypt/zone" + }, + { + "zone": { + "id": "be469efd-8e07-4b8e-bcee-6fd33373cdef", + "underlay_address": "fd00:1122:3344:3::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_ed8e5a26-5591-405a-b792-408f5b16e444" + }, + "http_address": "[fd00:1122:3344:3::1]:5353", + "dns_address": "[fd00:1122:3344:3::1]:53", + "gz_address": "fd00:1122:3344:3::2", + "gz_address_index": 2 + } + }, + "root": "/pool/ext/068d2790-1044-41ed-97a5-b493490b14d1/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled3.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled3.json new file mode 100644 index 0000000000..5da6d95389 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled3.json @@ -0,0 +1,178 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "19d091b8-e005-4ff4-97e1-026de95e3667", + "underlay_address": "fd00:1122:3344:10f::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::c]:32345", + "dataset": { + "pool_name": "oxp_11a63469-4f57-4976-8620-0055bf82dc97" + } + } + }, + "root": "/pool/ext/6a73a62c-c636-4557-af45-042cb287aee6/crypt/zone" + }, + { + "zone": { + "id": "57d77171-104e-4977-b2f9-9b529ee7f8a0", + "underlay_address": "fd00:1122:3344:10f::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::8]:32345", + "dataset": { + "pool_name": "oxp_7f3060af-058f-4f52-ab80-902bd13e7ef4" + } + } + }, + "root": "/pool/ext/7f3060af-058f-4f52-ab80-902bd13e7ef4/crypt/zone" + }, + { + "zone": { + "id": "b0371ccf-67da-4562-baf2-eaabe5243e9b", + "underlay_address": "fd00:1122:3344:10f::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::7]:32345", + "dataset": { + "pool_name": "oxp_58ae04cb-26ff-4e30-a20d-9f847bafba4d" + } + } + }, + "root": "/pool/ext/125ddcda-f94b-46bc-a10a-94e9acf40265/crypt/zone" + }, + { + "zone": { + "id": "ae3791ff-2657-4252-bd61-58ec5dc237cd", + "underlay_address": "fd00:1122:3344:10f::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::9]:32345", + "dataset": { + "pool_name": "oxp_125ddcda-f94b-46bc-a10a-94e9acf40265" + } + } + }, + "root": "/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone" + }, + { + "zone": { + "id": "73f865dc-5db7-48c6-9dc4-dff56dd8c045", + "underlay_address": "fd00:1122:3344:10f::3", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:10f::3]:17000" + } + }, + "root": "/pool/ext/11a63469-4f57-4976-8620-0055bf82dc97/crypt/zone" + }, + { + "zone": { + "id": "e5d0170a-0d60-4c51-8f72-4c301979690e", + "underlay_address": "fd00:1122:3344:10f::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::6]:32345", + "dataset": { + "pool_name": "oxp_efe4cbab-2a39-4d7d-ae6c-83eb3ab8d4b5" + } + } + }, + "root": "/pool/ext/6a73a62c-c636-4557-af45-042cb287aee6/crypt/zone" + }, + { + "zone": { + "id": "ea6894de-c575-43bc-86e9-65b8a58499ff", + "underlay_address": "fd00:1122:3344:10f::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::a]:32345", + "dataset": { + "pool_name": "oxp_a87dc882-8b88-4a99-9628-5db79072cffa" + } + } + }, + "root": "/pool/ext/11a63469-4f57-4976-8620-0055bf82dc97/crypt/zone" + }, + { + "zone": { + "id": "3081dc99-4fa9-4238-adfa-b9ca381c1f7b", + "underlay_address": "fd00:1122:3344:10f::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::b]:32345", + "dataset": { + "pool_name": "oxp_6a73a62c-c636-4557-af45-042cb287aee6" + } + } + }, + "root": "/pool/ext/a87dc882-8b88-4a99-9628-5db79072cffa/crypt/zone" + }, + { + "zone": { + "id": "b4a3d7c8-487d-4d76-ae4e-a6a51595a5a6", + "underlay_address": "fd00:1122:3344:10f::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::d]:32345", + "dataset": { + "pool_name": "oxp_a12f87ee-9918-4269-9de4-4bad4fb41caa" + } + } + }, + "root": "/pool/ext/a12f87ee-9918-4269-9de4-4bad4fb41caa/crypt/zone" + }, + { + "zone": { + "id": "5ebcee26-f76c-4206-8d81-584ac138d3b9", + "underlay_address": "fd00:1122:3344:10f::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::4]:32345", + "dataset": { + "pool_name": "oxp_27f1917e-fb69-496a-9d40-8ef0d0c0ee55" + } + } + }, + "root": "/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone" + }, + { + "zone": { + "id": "90b2bc57-3a2a-4117-bb6d-7eda7542329a", + "underlay_address": "fd00:1122:3344:10f::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10f::5]:32345", + "dataset": { + "pool_name": "oxp_a222e405-40f6-4fdd-9146-94f7d94ed08a" + } + } + }, + "root": "/pool/ext/a12f87ee-9918-4269-9de4-4bad4fb41caa/crypt/zone" + }, + { + "zone": { + "id": "0fb540af-58d3-4abc-bfad-e49765c2b1ee", + "underlay_address": "fd00:1122:3344:10f::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10f::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/58ae04cb-26ff-4e30-a20d-9f847bafba4d/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled30.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled30.json new file mode 100644 index 0000000000..c92a638b85 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled30.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "dda0f1c6-84a5-472c-b350-a799c8d3d0eb", + "underlay_address": "fd00:1122:3344:115::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::8]:32345", + "dataset": { + "pool_name": "oxp_028b6c9e-5a0e-43d2-a8ed-a5946cf62924" + } + } + }, + "root": "/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone" + }, + { + "zone": { + "id": "157672f9-113f-48b7-9808-dff3c3e67dcd", + "underlay_address": "fd00:1122:3344:115::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::a]:32345", + "dataset": { + "pool_name": "oxp_4fdca201-b37e-4072-a1cc-3cb7705954eb" + } + } + }, + "root": "/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone" + }, + { + "zone": { + "id": "5a7d4f67-a70f-4d8b-8d35-4dc600991fb5", + "underlay_address": "fd00:1122:3344:115::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::5]:32345", + "dataset": { + "pool_name": "oxp_11a991e5-19a9-48b0-8186-34249ef67957" + } + } + }, + "root": "/pool/ext/1e9c9764-aaa4-4681-b110-a937b4c52748/crypt/zone" + }, + { + "zone": { + "id": "c7036645-b680-4816-834f-8ae1af24c159", + "underlay_address": "fd00:1122:3344:115::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::b]:32345", + "dataset": { + "pool_name": "oxp_0780be56-c13d-4c6a-a1ac-37753a0da820" + } + } + }, + "root": "/pool/ext/80a8d756-ee22-4c88-8b5b-4a46f7eca249/crypt/zone" + }, + { + "zone": { + "id": "45e47e4b-708f-40b5-a8c8-fbfd73696d45", + "underlay_address": "fd00:1122:3344:115::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::7]:32345", + "dataset": { + "pool_name": "oxp_80a8d756-ee22-4c88-8b5b-4a46f7eca249" + } + } + }, + "root": "/pool/ext/4fdca201-b37e-4072-a1cc-3cb7705954eb/crypt/zone" + }, + { + "zone": { + "id": "e805b0c1-3f80-49da-8dc1-caaf843e5003", + "underlay_address": "fd00:1122:3344:115::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::c]:32345", + "dataset": { + "pool_name": "oxp_d54e1ed7-e589-4413-a487-6e9a257104e7" + } + } + }, + "root": "/pool/ext/d54e1ed7-e589-4413-a487-6e9a257104e7/crypt/zone" + }, + { + "zone": { + "id": "e47d3f81-3df6-4c35-bec6-41277bc74c07", + "underlay_address": "fd00:1122:3344:115::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::4]:32345", + "dataset": { + "pool_name": "oxp_b8d84b9c-a65e-4c86-8196-69da5317ae63" + } + } + }, + "root": "/pool/ext/772b3aaa-3501-4dc7-9b3d-048b8b1f7970/crypt/zone" + }, + { + "zone": { + "id": "2a796a69-b061-44c7-b2df-35bc611f10f5", + "underlay_address": "fd00:1122:3344:115::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::6]:32345", + "dataset": { + "pool_name": "oxp_73abe9e0-d38e-48fc-bdec-b094bfa5670d" + } + } + }, + "root": "/pool/ext/028b6c9e-5a0e-43d2-a8ed-a5946cf62924/crypt/zone" + }, + { + "zone": { + "id": "4e1d2af1-8ef4-4762-aa80-b08da08b45bb", + "underlay_address": "fd00:1122:3344:115::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::3]:32345", + "dataset": { + "pool_name": "oxp_772b3aaa-3501-4dc7-9b3d-048b8b1f7970" + } + } + }, + "root": "/pool/ext/d54e1ed7-e589-4413-a487-6e9a257104e7/crypt/zone" + }, + { + "zone": { + "id": "fb1b10d5-b7cb-416d-98fc-b5d3bc02d495", + "underlay_address": "fd00:1122:3344:115::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:115::9]:32345", + "dataset": { + "pool_name": "oxp_1e9c9764-aaa4-4681-b110-a937b4c52748" + } + } + }, + "root": "/pool/ext/b8d84b9c-a65e-4c86-8196-69da5317ae63/crypt/zone" + }, + { + "zone": { + "id": "5155463c-8a09-45a5-ad1b-817f2e93b284", + "underlay_address": "fd00:1122:3344:115::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:115::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/772b3aaa-3501-4dc7-9b3d-048b8b1f7970/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled31.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled31.json new file mode 100644 index 0000000000..5e38262740 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled31.json @@ -0,0 +1,181 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "a0eae689-8e6b-4297-bb3d-8b7ffc5c4a07", + "underlay_address": "fd00:1122:3344:102::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::c]:32345", + "dataset": { + "pool_name": "oxp_274cb567-fd74-4e00-b9c7-6ca367b3fda4" + } + } + }, + "root": "/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone" + }, + { + "zone": { + "id": "9cea406d-451e-4328-9052-b58487f799a5", + "underlay_address": "fd00:1122:3344:102::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::b]:32345", + "dataset": { + "pool_name": "oxp_89c7f72e-632c-462b-a515-01cd80683711" + } + } + }, + "root": "/pool/ext/274cb567-fd74-4e00-b9c7-6ca367b3fda4/crypt/zone" + }, + { + "zone": { + "id": "9c7dad7e-7f60-4bf4-8efc-0883a17e7cf6", + "underlay_address": "fd00:1122:3344:102::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::6]:32345", + "dataset": { + "pool_name": "oxp_2c8e5637-b989-4b8f-82ac-ff2e9102b560" + } + } + }, + "root": "/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone" + }, + { + "zone": { + "id": "73015cba-79c6-4a67-97d8-fa0819cbf750", + "underlay_address": "fd00:1122:3344:102::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::a]:32345", + "dataset": { + "pool_name": "oxp_fa62108e-f7bb-4f6d-86f3-8094a1ea8352" + } + } + }, + "root": "/pool/ext/2c8e5637-b989-4b8f-82ac-ff2e9102b560/crypt/zone" + }, + { + "zone": { + "id": "f9ca3097-072e-4e7f-9f50-eb7c7ae39b6f", + "underlay_address": "fd00:1122:3344:102::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::5]:32345", + "dataset": { + "pool_name": "oxp_42c6602c-2ccf-48ce-8344-693c832fd693" + } + } + }, + "root": "/pool/ext/2c8e5637-b989-4b8f-82ac-ff2e9102b560/crypt/zone" + }, + { + "zone": { + "id": "e7855e05-a125-4a80-ac2c-8a2db96e1bf8", + "underlay_address": "fd00:1122:3344:102::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::7]:32345", + "dataset": { + "pool_name": "oxp_1f72afd3-d2aa-46a8-b81a-54dbcc2f6317" + } + } + }, + "root": "/pool/ext/42c6602c-2ccf-48ce-8344-693c832fd693/crypt/zone" + }, + { + "zone": { + "id": "e5de9bc9-e996-4fea-8318-ad7a8a6be4a3", + "underlay_address": "fd00:1122:3344:102::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::4]:32345", + "dataset": { + "pool_name": "oxp_1443b190-de16-42b0-b881-e87e875dd507" + } + } + }, + "root": "/pool/ext/89c7f72e-632c-462b-a515-01cd80683711/crypt/zone" + }, + { + "zone": { + "id": "cd0d0aac-44ff-4566-9260-a64ae6cecef4", + "underlay_address": "fd00:1122:3344:102::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::8]:32345", + "dataset": { + "pool_name": "oxp_92c0d1f6-cb4d-4ddb-b5ba-979fb3491812" + } + } + }, + "root": "/pool/ext/89c7f72e-632c-462b-a515-01cd80683711/crypt/zone" + }, + { + "zone": { + "id": "a8230592-0e7a-46c8-a653-7587a27f05bf", + "underlay_address": "fd00:1122:3344:102::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::9]:32345", + "dataset": { + "pool_name": "oxp_1b7873de-99fd-454f-b576-bff695524133" + } + } + }, + "root": "/pool/ext/92c0d1f6-cb4d-4ddb-b5ba-979fb3491812/crypt/zone" + }, + { + "zone": { + "id": "c19ffbb1-4dc1-4825-a3cf-080e9b543b16", + "underlay_address": "fd00:1122:3344:102::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::d]:32345", + "dataset": { + "pool_name": "oxp_67823df7-511c-4984-b98c-7a8f5c40c22d" + } + } + }, + "root": "/pool/ext/1443b190-de16-42b0-b881-e87e875dd507/crypt/zone" + }, + { + "zone": { + "id": "ff30fe7c-51f3-43b9-a788-d8f94a7bb028", + "underlay_address": "fd00:1122:3344:102::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:102::3]:32221", + "dataset": { + "pool_name": "oxp_1443b190-de16-42b0-b881-e87e875dd507" + } + } + }, + "root": "/pool/ext/fa62108e-f7bb-4f6d-86f3-8094a1ea8352/crypt/zone" + }, + { + "zone": { + "id": "16b50c55-8117-4efd-aabf-0273677b89d5", + "underlay_address": "fd00:1122:3344:102::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:102::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/fa62108e-f7bb-4f6d-86f3-8094a1ea8352/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled4.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled4.json new file mode 100644 index 0000000000..7c1d269d61 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled4.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "22452953-ee80-4659-a555-8e027bf205b0", + "underlay_address": "fd00:1122:3344:10c::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::4]:32345", + "dataset": { + "pool_name": "oxp_92ba1667-a6f7-4913-9b00-14825384c7bf" + } + } + }, + "root": "/pool/ext/ab62b941-5f84-42c7-929d-295b20efffe7/crypt/zone" + }, + { + "zone": { + "id": "9a5a2fcf-44a0-4468-979a-a71686cef627", + "underlay_address": "fd00:1122:3344:10c::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::3]:32345", + "dataset": { + "pool_name": "oxp_dbfdc981-1b81-4d7d-9449-9530890b199a" + } + } + }, + "root": "/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone" + }, + { + "zone": { + "id": "a014f12e-2636-4258-af76-e01d9b8d1c1f", + "underlay_address": "fd00:1122:3344:10c::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::b]:32345", + "dataset": { + "pool_name": "oxp_ab62b941-5f84-42c7-929d-295b20efffe7" + } + } + }, + "root": "/pool/ext/a624a843-1c4e-41c3-a1d2-4be7a6c57e9b/crypt/zone" + }, + { + "zone": { + "id": "431768b8-26ba-4ab4-b616-9e183bb79b8b", + "underlay_address": "fd00:1122:3344:10c::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::7]:32345", + "dataset": { + "pool_name": "oxp_7c121177-3210-4457-9b42-3657add6e166" + } + } + }, + "root": "/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone" + }, + { + "zone": { + "id": "22992c56-bd5a-4d0f-86c5-d6f8e87b7bbb", + "underlay_address": "fd00:1122:3344:10c::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::9]:32345", + "dataset": { + "pool_name": "oxp_842bdd28-196e-4b18-83db-68bd81176a44" + } + } + }, + "root": "/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone" + }, + { + "zone": { + "id": "de376149-aa45-4660-9ae6-15e8ba4a4233", + "underlay_address": "fd00:1122:3344:10c::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::5]:32345", + "dataset": { + "pool_name": "oxp_25856a84-6707-4b94-81d1-b43d5bc990d7" + } + } + }, + "root": "/pool/ext/7c121177-3210-4457-9b42-3657add6e166/crypt/zone" + }, + { + "zone": { + "id": "ceeba69d-8c0a-47df-a37b-7f1b90f23016", + "underlay_address": "fd00:1122:3344:10c::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::a]:32345", + "dataset": { + "pool_name": "oxp_a624a843-1c4e-41c3-a1d2-4be7a6c57e9b" + } + } + }, + "root": "/pool/ext/74ac4da9-cdae-4c08-8431-11211184aa09/crypt/zone" + }, + { + "zone": { + "id": "65293ce4-2e63-4336-9207-3c61f58667f9", + "underlay_address": "fd00:1122:3344:10c::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::c]:32345", + "dataset": { + "pool_name": "oxp_74ac4da9-cdae-4c08-8431-11211184aa09" + } + } + }, + "root": "/pool/ext/842bdd28-196e-4b18-83db-68bd81176a44/crypt/zone" + }, + { + "zone": { + "id": "e8f55a5d-65f9-436c-bc25-1d1a7070e876", + "underlay_address": "fd00:1122:3344:10c::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::6]:32345", + "dataset": { + "pool_name": "oxp_9bfe385c-16dd-4209-bc0b-f28ae75d58e3" + } + } + }, + "root": "/pool/ext/92ba1667-a6f7-4913-9b00-14825384c7bf/crypt/zone" + }, + { + "zone": { + "id": "2dfbd4c6-afbf-4c8c-bf40-764f02727852", + "underlay_address": "fd00:1122:3344:10c::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10c::8]:32345", + "dataset": { + "pool_name": "oxp_55eb093d-6b6f-418c-9767-09afe4c51fff" + } + } + }, + "root": "/pool/ext/dbfdc981-1b81-4d7d-9449-9530890b199a/crypt/zone" + }, + { + "zone": { + "id": "8c73baf7-1a58-4e2c-b4d1-966c89a18d03", + "underlay_address": "fd00:1122:3344:10c::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10c::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/842bdd28-196e-4b18-83db-68bd81176a44/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled5.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled5.json new file mode 100644 index 0000000000..acbfa17eda --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled5.json @@ -0,0 +1,178 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "2f488e7b-fd93-48a6-8b2b-61f6e8336268", + "underlay_address": "fd00:1122:3344:101::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::b]:32345", + "dataset": { + "pool_name": "oxp_5840a3b7-f765-45d3-8a41-7f543f936bee" + } + } + }, + "root": "/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone" + }, + { + "zone": { + "id": "1ed5fd3f-933a-4921-a91f-5c286823f8d4", + "underlay_address": "fd00:1122:3344:101::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::a]:32345", + "dataset": { + "pool_name": "oxp_c1e807e7-b64a-4dbd-b845-ffed0b9a54f1" + } + } + }, + "root": "/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone" + }, + { + "zone": { + "id": "0f8f1013-465d-4b49-b55d-f0b9bf6f789a", + "underlay_address": "fd00:1122:3344:101::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::6]:32345", + "dataset": { + "pool_name": "oxp_4dfa7003-0305-47f5-b23d-88a228c1e12e" + } + } + }, + "root": "/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone" + }, + { + "zone": { + "id": "2e4ef017-6c62-40bc-bab5-f2e01addad22", + "underlay_address": "fd00:1122:3344:101::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::7]:32345", + "dataset": { + "pool_name": "oxp_d94e9c58-e6d1-444b-b7d8-19ac17dea042" + } + } + }, + "root": "/pool/ext/c1e807e7-b64a-4dbd-b845-ffed0b9a54f1/crypt/zone" + }, + { + "zone": { + "id": "6a0baf13-a80b-4778-a0ab-a69cd851de2d", + "underlay_address": "fd00:1122:3344:101::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::9]:32345", + "dataset": { + "pool_name": "oxp_be06ea9c-df86-4fec-b5dd-8809710893af" + } + } + }, + "root": "/pool/ext/a9d419d4-5915-4a40-baa3-3512785de034/crypt/zone" + }, + { + "zone": { + "id": "391ec257-fd47-4cc8-9bfa-49a0747a9a67", + "underlay_address": "fd00:1122:3344:101::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::8]:32345", + "dataset": { + "pool_name": "oxp_a9d419d4-5915-4a40-baa3-3512785de034" + } + } + }, + "root": "/pool/ext/709d5d04-5dff-4558-8b5d-fbc2a7d83036/crypt/zone" + }, + { + "zone": { + "id": "fd8e615a-f170-4da9-b8d0-2a5a123d8682", + "underlay_address": "fd00:1122:3344:101::3", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:101::3]:17000" + } + }, + "root": "/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone" + }, + { + "zone": { + "id": "f8a793f4-cd08-49ec-8fee-6bcd37092fdc", + "underlay_address": "fd00:1122:3344:101::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::c]:32345", + "dataset": { + "pool_name": "oxp_709d5d04-5dff-4558-8b5d-fbc2a7d83036" + } + } + }, + "root": "/pool/ext/d94e9c58-e6d1-444b-b7d8-19ac17dea042/crypt/zone" + }, + { + "zone": { + "id": "c67d44be-d6b8-4a08-a7e0-3ab300749ad6", + "underlay_address": "fd00:1122:3344:101::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::4]:32345", + "dataset": { + "pool_name": "oxp_231cd696-2839-4a9a-ae42-6d875a98a797" + } + } + }, + "root": "/pool/ext/709d5d04-5dff-4558-8b5d-fbc2a7d83036/crypt/zone" + }, + { + "zone": { + "id": "e91b4957-8165-451d-9fa5-090c3a39f199", + "underlay_address": "fd00:1122:3344:101::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::d]:32345", + "dataset": { + "pool_name": "oxp_dd084b76-1130-4ad3-9196-6b02be607fe9" + } + } + }, + "root": "/pool/ext/5840a3b7-f765-45d3-8a41-7f543f936bee/crypt/zone" + }, + { + "zone": { + "id": "5e737b6e-d33d-4a2c-b8c0-3cad9d05a68f", + "underlay_address": "fd00:1122:3344:101::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:101::5]:32345", + "dataset": { + "pool_name": "oxp_8fa4f837-c6f3-4c65-88d4-21eb3cd7ffee" + } + } + }, + "root": "/pool/ext/dd084b76-1130-4ad3-9196-6b02be607fe9/crypt/zone" + }, + { + "zone": { + "id": "7e6b7816-b1a6-40f3-894a-a5d5c0571dbb", + "underlay_address": "fd00:1122:3344:101::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:101::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/be06ea9c-df86-4fec-b5dd-8809710893af/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled6.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled6.json new file mode 100644 index 0000000000..ce4b6f03cd --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled6.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "eafffae7-69fd-49e1-9541-7cf237ab12b3", + "underlay_address": "fd00:1122:3344:110::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::3]:32345", + "dataset": { + "pool_name": "oxp_929404cd-2522-4440-b21c-91d466a9a7e0" + } + } + }, + "root": "/pool/ext/aff390ed-8d70-49fa-9000-5420b54ab118/crypt/zone" + }, + { + "zone": { + "id": "f4bccf15-d69f-402d-9bd2-7959a4cb2823", + "underlay_address": "fd00:1122:3344:110::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::9]:32345", + "dataset": { + "pool_name": "oxp_f80f96be-a3d7-490a-96a7-faf7da80a579" + } + } + }, + "root": "/pool/ext/6bcd54c8-d4a8-429d-8f17-cf02615eb063/crypt/zone" + }, + { + "zone": { + "id": "82e51c9d-c187-4baa-8307-e46eeafc5ff2", + "underlay_address": "fd00:1122:3344:110::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::5]:32345", + "dataset": { + "pool_name": "oxp_37d86199-6834-49d9-888a-88ff6f281b29" + } + } + }, + "root": "/pool/ext/d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f/crypt/zone" + }, + { + "zone": { + "id": "cf667caf-304c-40c4-acce-f0eb05d011ef", + "underlay_address": "fd00:1122:3344:110::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::8]:32345", + "dataset": { + "pool_name": "oxp_625c0110-644e-4d63-8321-b85ab5642260" + } + } + }, + "root": "/pool/ext/d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f/crypt/zone" + }, + { + "zone": { + "id": "14e60912-108e-4dd3-984e-2332a183b346", + "underlay_address": "fd00:1122:3344:110::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::b]:32345", + "dataset": { + "pool_name": "oxp_fa6470f5-0a4c-4fef-b0b1-57c8749c6cca" + } + } + }, + "root": "/pool/ext/6c5ab641-3bd4-4d8c-96f4-4f56c1045142/crypt/zone" + }, + { + "zone": { + "id": "1aacf923-c96f-4bab-acb0-63f28e86eef6", + "underlay_address": "fd00:1122:3344:110::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::c]:32345", + "dataset": { + "pool_name": "oxp_21b0f3ed-d27f-4996-968b-bf2b494d9308" + } + } + }, + "root": "/pool/ext/625c0110-644e-4d63-8321-b85ab5642260/crypt/zone" + }, + { + "zone": { + "id": "b9db0845-04d3-4dc1-84ba-224749562a6c", + "underlay_address": "fd00:1122:3344:110::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::6]:32345", + "dataset": { + "pool_name": "oxp_d2e27e2a-2deb-42ae-84a7-c2d06f3aeb4f" + } + } + }, + "root": "/pool/ext/aff390ed-8d70-49fa-9000-5420b54ab118/crypt/zone" + }, + { + "zone": { + "id": "38b51865-ee80-4e1b-a40b-3452951f9022", + "underlay_address": "fd00:1122:3344:110::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::7]:32345", + "dataset": { + "pool_name": "oxp_6bcd54c8-d4a8-429d-8f17-cf02615eb063" + } + } + }, + "root": "/pool/ext/37d86199-6834-49d9-888a-88ff6f281b29/crypt/zone" + }, + { + "zone": { + "id": "4bc441f6-f7e5-4d68-8751-53ef1e251c47", + "underlay_address": "fd00:1122:3344:110::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::a]:32345", + "dataset": { + "pool_name": "oxp_6c5ab641-3bd4-4d8c-96f4-4f56c1045142" + } + } + }, + "root": "/pool/ext/21b0f3ed-d27f-4996-968b-bf2b494d9308/crypt/zone" + }, + { + "zone": { + "id": "d2c20cf8-ed4c-4815-add9-45996364f721", + "underlay_address": "fd00:1122:3344:110::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:110::4]:32345", + "dataset": { + "pool_name": "oxp_aff390ed-8d70-49fa-9000-5420b54ab118" + } + } + }, + "root": "/pool/ext/6c5ab641-3bd4-4d8c-96f4-4f56c1045142/crypt/zone" + }, + { + "zone": { + "id": "1bb548cb-889a-411e-8c67-d1b785225180", + "underlay_address": "fd00:1122:3344:110::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:110::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/6bcd54c8-d4a8-429d-8f17-cf02615eb063/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled7.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled7.json new file mode 100644 index 0000000000..62653d0767 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled7.json @@ -0,0 +1,167 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "2eb74fa3-71ec-484c-8ffa-3daeab0e4c78", + "underlay_address": "fd00:1122:3344:11d::3", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::3]:32345", + "dataset": { + "pool_name": "oxp_c6b63fea-e3e2-4806-b8dc-bdfe7b5c3d89" + } + } + }, + "root": "/pool/ext/9f20cbae-7a63-4c31-9386-2ac3cbe12030/crypt/zone" + }, + { + "zone": { + "id": "9f92bfcf-7435-44a6-8e77-0597f93cd0b4", + "underlay_address": "fd00:1122:3344:11d::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::7]:32345", + "dataset": { + "pool_name": "oxp_9fa336f1-2b69-4ebf-9553-e3bab7e3e6ef" + } + } + }, + "root": "/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone" + }, + { + "zone": { + "id": "1bf9aed4-9fd3-4d87-b8e7-7f066d25ec1d", + "underlay_address": "fd00:1122:3344:11d::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::b]:32345", + "dataset": { + "pool_name": "oxp_a5a52f47-9c9a-4519-83dc-abc56619495d" + } + } + }, + "root": "/pool/ext/cbcad26e-5e52-41b7-9875-1a84d30d8a15/crypt/zone" + }, + { + "zone": { + "id": "2a722aa7-cd8a-445d-83fe-57fc9b9a8249", + "underlay_address": "fd00:1122:3344:11d::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::8]:32345", + "dataset": { + "pool_name": "oxp_1f4b71eb-505f-4706-912c-b13dd3f2eafb" + } + } + }, + "root": "/pool/ext/a5a52f47-9c9a-4519-83dc-abc56619495d/crypt/zone" + }, + { + "zone": { + "id": "76af5b23-d833-435c-b848-2a09d9fad9a1", + "underlay_address": "fd00:1122:3344:11d::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::c]:32345", + "dataset": { + "pool_name": "oxp_cbcad26e-5e52-41b7-9875-1a84d30d8a15" + } + } + }, + "root": "/pool/ext/9f20cbae-7a63-4c31-9386-2ac3cbe12030/crypt/zone" + }, + { + "zone": { + "id": "3a412bf4-a385-4e66-9ada-a87f6536d6ca", + "underlay_address": "fd00:1122:3344:11d::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::4]:32345", + "dataset": { + "pool_name": "oxp_e05a6264-63f2-4961-bc14-57b4f65614c0" + } + } + }, + "root": "/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone" + }, + { + "zone": { + "id": "99a25fa7-8231-4a46-a6ec-ffc5281db1f8", + "underlay_address": "fd00:1122:3344:11d::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::5]:32345", + "dataset": { + "pool_name": "oxp_722494ab-9a2b-481b-ac11-292fded682a5" + } + } + }, + "root": "/pool/ext/e05a6264-63f2-4961-bc14-57b4f65614c0/crypt/zone" + }, + { + "zone": { + "id": "06c7ddc8-9b3e-48ef-9874-0c40874e9877", + "underlay_address": "fd00:1122:3344:11d::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::a]:32345", + "dataset": { + "pool_name": "oxp_8c3972d1-5b17-4479-88cc-1c33e4344160" + } + } + }, + "root": "/pool/ext/8c3972d1-5b17-4479-88cc-1c33e4344160/crypt/zone" + }, + { + "zone": { + "id": "1212b2dc-157d-4bd3-94af-fb5db1d91f24", + "underlay_address": "fd00:1122:3344:11d::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::9]:32345", + "dataset": { + "pool_name": "oxp_9f20cbae-7a63-4c31-9386-2ac3cbe12030" + } + } + }, + "root": "/pool/ext/977aa6c3-2026-4178-9948-e09f78008575/crypt/zone" + }, + { + "zone": { + "id": "b1fb5f2e-b20d-4f4c-9f6f-bbeb1a98dd50", + "underlay_address": "fd00:1122:3344:11d::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:11d::6]:32345", + "dataset": { + "pool_name": "oxp_977aa6c3-2026-4178-9948-e09f78008575" + } + } + }, + "root": "/pool/ext/722494ab-9a2b-481b-ac11-292fded682a5/crypt/zone" + }, + { + "zone": { + "id": "e68dde0f-0647-46db-ae1c-711835c13e25", + "underlay_address": "fd00:1122:3344:11d::d", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:11d::d]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/1f4b71eb-505f-4706-912c-b13dd3f2eafb/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled8.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled8.json new file mode 100644 index 0000000000..b848826231 --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled8.json @@ -0,0 +1,198 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "85c18b7c-a100-458c-b18d-ecfdacaefac4", + "underlay_address": "fd00:1122:3344:10e::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::5]:32345", + "dataset": { + "pool_name": "oxp_07b266bc-86c3-4a76-9522-8b34ba1ae78c" + } + } + }, + "root": "/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone" + }, + { + "zone": { + "id": "db303465-7879-4d86-8da8-a0c7162e5184", + "underlay_address": "fd00:1122:3344:10e::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::4]:32345", + "dataset": { + "pool_name": "oxp_e9488a32-880d-44a2-8948-db0b7e3a35b5" + } + } + }, + "root": "/pool/ext/8d798756-7200-4db4-9faf-f41b75106a63/crypt/zone" + }, + { + "zone": { + "id": "c44ce6be-512d-4104-9260-a5b8fe373937", + "underlay_address": "fd00:1122:3344:10e::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::9]:32345", + "dataset": { + "pool_name": "oxp_025dfc06-5aeb-407f-adc8-ba18dc9bba35" + } + } + }, + "root": "/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone" + }, + { + "zone": { + "id": "1cfdb5b6-e568-436a-a85f-7fecf1b8eef2", + "underlay_address": "fd00:1122:3344:10e::3", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:10e::3]:12221", + "external_ip": "45.154.216.36", + "nic": { + "id": "569754a2-a5e0-4aa8-90a7-2fa65f43b667", + "kind": { + "type": "service", + "id": "1cfdb5b6-e568-436a-a85f-7fecf1b8eef2" + }, + "name": "nexus-1cfdb5b6-e568-436a-a85f-7fecf1b8eef2", + "ip": "172.30.2.6", + "mac": "A8:40:25:FF:EC:6B", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "8.8.8.8" + ] + } + }, + "root": "/pool/ext/025dfc06-5aeb-407f-adc8-ba18dc9bba35/crypt/zone" + }, + { + "zone": { + "id": "44a68792-ca14-442e-b7a9-11970d50ba0e", + "underlay_address": "fd00:1122:3344:10e::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::a]:32345", + "dataset": { + "pool_name": "oxp_2a492098-7df3-4409-9466-561edb7aa99b" + } + } + }, + "root": "/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone" + }, + { + "zone": { + "id": "514cf0ca-6d23-434e-9785-446b83b2f029", + "underlay_address": "fd00:1122:3344:10e::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::7]:32345", + "dataset": { + "pool_name": "oxp_5b88e44e-f886-4de8-8a6b-48ea5ed9d70b" + } + } + }, + "root": "/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone" + }, + { + "zone": { + "id": "bc6d8347-8f64-4031-912c-932349df07fe", + "underlay_address": "fd00:1122:3344:10e::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::6]:32345", + "dataset": { + "pool_name": "oxp_1544ce68-3544-4cba-b3b6-1927d08b78a5" + } + } + }, + "root": "/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone" + }, + { + "zone": { + "id": "1ab0a4f5-99ad-4341-8c89-7fd03e5ccb08", + "underlay_address": "fd00:1122:3344:10e::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::b]:32345", + "dataset": { + "pool_name": "oxp_033eb462-968f-42ce-9c29-377bd40a3014" + } + } + }, + "root": "/pool/ext/9e1a0803-7453-4eac-91c9-d7891ecd634f/crypt/zone" + }, + { + "zone": { + "id": "d6f2520b-3d04-44d9-bd46-6ffccfcb46d2", + "underlay_address": "fd00:1122:3344:10e::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::8]:32345", + "dataset": { + "pool_name": "oxp_36e8d29c-1e88-4c2b-8f59-f312201067c3" + } + } + }, + "root": "/pool/ext/1544ce68-3544-4cba-b3b6-1927d08b78a5/crypt/zone" + }, + { + "zone": { + "id": "d6da9d13-bfcf-469d-a99e-faeb5e30be32", + "underlay_address": "fd00:1122:3344:10e::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::c]:32345", + "dataset": { + "pool_name": "oxp_9e1a0803-7453-4eac-91c9-d7891ecd634f" + } + } + }, + "root": "/pool/ext/8d798756-7200-4db4-9faf-f41b75106a63/crypt/zone" + }, + { + "zone": { + "id": "a1dc59c2-5883-4fb8-83be-ac2d95d255d1", + "underlay_address": "fd00:1122:3344:10e::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10e::d]:32345", + "dataset": { + "pool_name": "oxp_8d798756-7200-4db4-9faf-f41b75106a63" + } + } + }, + "root": "/pool/ext/36e8d29c-1e88-4c2b-8f59-f312201067c3/crypt/zone" + }, + { + "zone": { + "id": "48f25dba-7392-44ce-9bb0-28489ebc44bc", + "underlay_address": "fd00:1122:3344:10e::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10e::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/5b88e44e-f886-4de8-8a6b-48ea5ed9d70b/crypt/zone" + } + ] +} \ No newline at end of file diff --git a/sled-agent/tests/output/new-zones-ledgers/rack3-sled9.json b/sled-agent/tests/output/new-zones-ledgers/rack3-sled9.json new file mode 100644 index 0000000000..62d45a2f5a --- /dev/null +++ b/sled-agent/tests/output/new-zones-ledgers/rack3-sled9.json @@ -0,0 +1,178 @@ +{ + "omicron_generation": 2, + "ledger_generation": 4, + "zones": [ + { + "zone": { + "id": "b452e5e1-ab4c-4994-9679-ef21b3b4fee9", + "underlay_address": "fd00:1122:3344:10b::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::6]:32345", + "dataset": { + "pool_name": "oxp_d63a297d-ae6a-4072-9dca-dda404044989" + } + } + }, + "root": "/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone" + }, + { + "zone": { + "id": "e9826cdc-6d3a-4eff-b1b5-ec4364ebe6b9", + "underlay_address": "fd00:1122:3344:10b::3", + "zone_type": { + "type": "oximeter", + "address": "[fd00:1122:3344:10b::3]:12223" + } + }, + "root": "/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone" + }, + { + "zone": { + "id": "b0cde4a8-f27c-46e8-8355-756be9045afc", + "underlay_address": "fd00:1122:3344:10b::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::b]:32345", + "dataset": { + "pool_name": "oxp_07c1a8e7-51f5-4f12-a43d-734719fef92b" + } + } + }, + "root": "/pool/ext/1f6adf64-c9b9-4ed7-b3e2-37fb25624646/crypt/zone" + }, + { + "zone": { + "id": "e2f70cf6-e285-4212-9b01-77ebf2ca9219", + "underlay_address": "fd00:1122:3344:10b::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::d]:32345", + "dataset": { + "pool_name": "oxp_a809f28a-7f25-4362-bc56-0cbdd72af2cb" + } + } + }, + "root": "/pool/ext/92a1bd39-6e8a-4226-b9d0-e3e8a9b8504f/crypt/zone" + }, + { + "zone": { + "id": "b0949c9d-4aa1-4bc4-9cb3-5875b9166885", + "underlay_address": "fd00:1122:3344:10b::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::a]:32345", + "dataset": { + "pool_name": "oxp_af0cc12b-43c5-473a-89a7-28351fbbb430" + } + } + }, + "root": "/pool/ext/cf1594ed-7c0c-467c-b0af-a689dcb427a3/crypt/zone" + }, + { + "zone": { + "id": "7cea4d59-a8ca-4826-901d-8d5bd935dc09", + "underlay_address": "fd00:1122:3344:10b::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::9]:32345", + "dataset": { + "pool_name": "oxp_d75dae09-4992-4a61-ab7d-5ae1d2b068ba" + } + } + }, + "root": "/pool/ext/a809f28a-7f25-4362-bc56-0cbdd72af2cb/crypt/zone" + }, + { + "zone": { + "id": "08adaeee-c3b5-4cd8-8fbd-ac371b3101c9", + "underlay_address": "fd00:1122:3344:10b::4", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::4]:32345", + "dataset": { + "pool_name": "oxp_d9f23187-fbf9-4ea5-a103-bc112263a9a7" + } + } + }, + "root": "/pool/ext/7c204111-31df-4c32-9a3e-780411f700fd/crypt/zone" + }, + { + "zone": { + "id": "3da1ade5-3fcb-4e64-aa08-81ee8a9ef723", + "underlay_address": "fd00:1122:3344:10b::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::8]:32345", + "dataset": { + "pool_name": "oxp_1f6adf64-c9b9-4ed7-b3e2-37fb25624646" + } + } + }, + "root": "/pool/ext/07c1a8e7-51f5-4f12-a43d-734719fef92b/crypt/zone" + }, + { + "zone": { + "id": "816f26a7-4c28-4a39-b9ad-a036678520ab", + "underlay_address": "fd00:1122:3344:10b::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::7]:32345", + "dataset": { + "pool_name": "oxp_92a1bd39-6e8a-4226-b9d0-e3e8a9b8504f" + } + } + }, + "root": "/pool/ext/d9f23187-fbf9-4ea5-a103-bc112263a9a7/crypt/zone" + }, + { + "zone": { + "id": "839f9839-409f-45d3-b8a6-7085507b90f6", + "underlay_address": "fd00:1122:3344:10b::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::c]:32345", + "dataset": { + "pool_name": "oxp_7c204111-31df-4c32-9a3e-780411f700fd" + } + } + }, + "root": "/pool/ext/af0cc12b-43c5-473a-89a7-28351fbbb430/crypt/zone" + }, + { + "zone": { + "id": "c717c81f-a228-4412-a34e-90f8c491d847", + "underlay_address": "fd00:1122:3344:10b::5", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:10b::5]:32345", + "dataset": { + "pool_name": "oxp_cf1594ed-7c0c-467c-b0af-a689dcb427a3" + } + } + }, + "root": "/pool/ext/d63a297d-ae6a-4072-9dca-dda404044989/crypt/zone" + }, + { + "zone": { + "id": "e1fa2023-6c86-40a4-ae59-a0de112cf7a9", + "underlay_address": "fd00:1122:3344:10b::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:10b::e]:123", + "ntp_servers": [ + "440dd615-e11f-4a5d-aeb4-dcf88bb314de.host.control-plane.oxide.internal", + "cb901d3e-8811-4c4c-a274-a44130501ecf.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + "root": "/pool/ext/d9f23187-fbf9-4ea5-a103-bc112263a9a7/crypt/zone" + } + ] +} \ No newline at end of file From 3349a199f368fe83e92c1cb9555bb65baaf7a73d Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Thu, 30 Nov 2023 17:42:26 -0800 Subject: [PATCH 039/186] serde default for autoneg param (#4591) --- common/src/api/internal/shared.rs | 1 + openapi/bootstrap-agent.json | 2 +- openapi/nexus-internal.json | 2 +- openapi/sled-agent.json | 2 +- openapi/wicketd.json | 2 +- schema/rss-sled-plan.json | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 15ab4c66ce..c8d8b1c786 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -141,6 +141,7 @@ pub struct PortConfigV1 { /// BGP peers on this port pub bgp_peers: Vec, /// Whether or not to set autonegotiation + #[serde(default)] pub autoneg: bool, } diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index efd9c05fa9..0c5bd15050 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -512,6 +512,7 @@ }, "autoneg": { "description": "Whether or not to set autonegotiation", + "default": false, "type": "boolean" }, "bgp_peers": { @@ -559,7 +560,6 @@ }, "required": [ "addresses", - "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 82c799b78d..7785d232d9 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4242,6 +4242,7 @@ }, "autoneg": { "description": "Whether or not to set autonegotiation", + "default": false, "type": "boolean" }, "bgp_peers": { @@ -4289,7 +4290,6 @@ }, "required": [ "addresses", - "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 6a0d692e99..9951392e98 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5339,6 +5339,7 @@ }, "autoneg": { "description": "Whether or not to set autonegotiation", + "default": false, "type": "boolean" }, "bgp_peers": { @@ -5386,7 +5387,6 @@ }, "required": [ "addresses", - "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 32e3b70de2..804b2029c6 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1547,6 +1547,7 @@ }, "autoneg": { "description": "Whether or not to set autonegotiation", + "default": false, "type": "boolean" }, "bgp_peers": { @@ -1594,7 +1595,6 @@ }, "required": [ "addresses", - "autoneg", "bgp_peers", "port", "routes", diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 5086c38a9c..2ef7a7b58a 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -366,7 +366,6 @@ "type": "object", "required": [ "addresses", - "autoneg", "bgp_peers", "port", "routes", @@ -384,6 +383,7 @@ }, "autoneg": { "description": "Whether or not to set autonegotiation", + "default": false, "type": "boolean" }, "bgp_peers": { From 92aed1a25a57d8483fc212ba245d871514aeadee Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 05:17:22 +0000 Subject: [PATCH 040/186] Update taiki-e/install-action digest to 21526ba (#4593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`6b385b7` -> `21526ba`](https://togithub.com/taiki-e/install-action/compare/6b385b7...21526ba) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index afc56f40ca..b5a7504066 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@6b385b7509c65e9d1b7d6b72244f7e275a7f5cef # v2 + uses: taiki-e/install-action@21526ba3bb38834e625c185ae4f2f942f1fb8f27 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From ac94fc0e8f53c44f7655994068833d361b81c004 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 1 Dec 2023 14:09:41 -0800 Subject: [PATCH 041/186] Update Cargo.toml to match package-manifest for crucible (#4597) Co-authored-by: Alan Hanson --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ sled-agent/src/sim/http_entrypoints_pantry.rs | 7 ++++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b730cbda97..06c1c2b5b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -458,7 +458,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" +source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "bhyve_api_sys", "libc", @@ -468,7 +468,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" +source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "libc", "strum", @@ -1281,7 +1281,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" +source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" dependencies = [ "anyhow", "chrono", @@ -1297,7 +1297,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" +source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" dependencies = [ "anyhow", "chrono", @@ -1314,7 +1314,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" +source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" dependencies = [ "crucible-workspace-hack", "libc", @@ -6112,7 +6112,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" +source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "async-trait", "base64 0.21.5", @@ -6133,7 +6133,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" +source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "anyhow", "atty", @@ -6163,7 +6163,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" +source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 694cd2c8dc..6d47909399 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -171,9 +171,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -292,9 +292,9 @@ pretty-hex = "0.4.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 64b26a83a4..8430dc0731 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -96,7 +96,7 @@ struct JobPollResponse { /// Poll to see if a Pantry background job is done #[endpoint { method = GET, - path = "/crucible/pantry/0/job/{id}/is_finished", + path = "/crucible/pantry/0/job/{id}/is-finished", }] async fn is_job_finished( rc: RequestContext>, @@ -139,6 +139,7 @@ async fn job_result_ok( } #[derive(Debug, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] pub enum ExpectedDigest { Sha256(String), } @@ -157,7 +158,7 @@ struct ImportFromUrlResponse { /// Import data from a URL into a volume #[endpoint { method = POST, - path = "/crucible/pantry/0/volume/{id}/import_from_url", + path = "/crucible/pantry/0/volume/{id}/import-from-url", }] async fn import_from_url( rc: RequestContext>, @@ -213,7 +214,7 @@ struct BulkWriteRequest { /// Bulk write data into a volume at a specified offset #[endpoint { method = POST, - path = "/crucible/pantry/0/volume/{id}/bulk_write", + path = "/crucible/pantry/0/volume/{id}/bulk-write", }] async fn bulk_write( rc: RequestContext>, From 8cf3e2a0f9a312e806a49faac8bdacc9eb034a77 Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Fri, 1 Dec 2023 15:51:43 -0800 Subject: [PATCH 042/186] move to openapiv3 v2.0.0-rc.1 (#4582) --- Cargo.lock | 39 ++++++++++++++------------------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06c1c2b5b7..329f74bb77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1790,7 +1790,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "pretty-hex 0.4.0", "schemars", "serde", @@ -1895,7 +1895,7 @@ dependencies = [ "hyper", "indexmap 2.1.0", "multer", - "openapiv3 2.0.0-rc.1", + "openapiv3", "paste", "percent-encoding", "proc-macro2", @@ -3245,7 +3245,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "schemars", "serde", "serde_json", @@ -4039,7 +4039,7 @@ dependencies = [ "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", - "openapiv3 1.0.3", + "openapiv3", "openssl", "oso", "oximeter", @@ -4565,7 +4565,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "schemars", "serde", "serde_json", @@ -4642,7 +4642,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "openssl", "oxide-client", "oximeter", @@ -4840,7 +4840,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "opte-ioctl", "oximeter", "oximeter-instruments", @@ -4970,7 +4970,7 @@ dependencies = [ "num-iter", "num-traits", "once_cell", - "openapiv3 2.0.0-rc.1", + "openapiv3", "petgraph", "postgres-types", "ppv-lite86", @@ -5066,26 +5066,15 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openapi-lint" version = "0.4.0" -source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#bb69a3a4a184d966bac2a0df2be5c9038d9867d0" +source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", "indexmap 2.1.0", "lazy_static", - "openapiv3 1.0.3", + "openapiv3", "regex", ] -[[package]] -name = "openapiv3" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75e56d5c441965b6425165b7e3223cc933ca469834f4a8b4786817a1f9dc4f13" -dependencies = [ - "indexmap 2.1.0", - "serde", - "serde_json", -] - [[package]] name = "openapiv3" version = "2.0.0-rc.1" @@ -5312,7 +5301,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "oximeter", "oximeter-client", "oximeter-db", @@ -6079,7 +6068,7 @@ dependencies = [ "heck 0.4.1", "http", "indexmap 2.1.0", - "openapiv3 2.0.0-rc.1", + "openapiv3", "proc-macro2", "quote", "regex", @@ -6097,7 +6086,7 @@ name = "progenitor-macro" version = "0.4.0" source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ - "openapiv3 2.0.0-rc.1", + "openapiv3", "proc-macro2", "progenitor-impl", "quote", @@ -9549,7 +9538,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3 1.0.3", + "openapiv3", "rand 0.8.5", "reqwest", "schemars", diff --git a/Cargo.toml b/Cargo.toml index 6d47909399..533e710dc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -263,7 +263,7 @@ oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649", features = [ "api", "std" ] } once_cell = "1.18.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -openapiv3 = "1.0" +openapiv3 = "2.0.0-rc.1" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" From 6f2d1c5b1589295249e7b4548657374bfe11f909 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Mon, 4 Dec 2023 16:55:29 +0000 Subject: [PATCH 043/186] Improve wtmp record mangling. (#4501) This fixes two problems with the current wtmp/utmp record mangling. First it is not idempotent, the system's notion of boot time is set to the current time of day whenever sled agent confirms that time is synchronised. Secondly, this is only approximate even for the first sled agent start, but it's plain wrong if sled agent restarts later. In conjunction with changes to stlouis, the `tmpx` utility is now able to process all zones itself, and uses the true system boot time for each zone when updating records. Fixes: https://github.com/oxidecomputer/omicron/issues/3514 --- sled-agent/src/services.rs | 45 ++++++-------------------- tools/install_builder_prerequisites.sh | 2 ++ 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 88f79e7064..dc309e8423 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -101,13 +101,11 @@ use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::collections::HashSet; -use std::iter; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use tokio::sync::{oneshot, MutexGuard}; @@ -2931,10 +2929,7 @@ impl ServiceManager { Ok(()) } - pub fn boottime_rewrite<'a>( - &self, - zones: impl Iterator, - ) { + pub fn boottime_rewrite(&self) { if self .inner .time_synced @@ -2945,33 +2940,13 @@ impl ServiceManager { return; } - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("SystemTime before UNIX EPOCH"); - - info!(self.inner.log, "Setting boot time to {:?}", now); - - let files: Vec = zones - .map(|z| z.root()) - .chain(iter::once(Utf8PathBuf::from("/"))) - .flat_map(|r| [r.join("var/adm/utmpx"), r.join("var/adm/wtmpx")]) - .collect(); - - for file in files { - let mut command = std::process::Command::new(PFEXEC); - let cmd = command.args(&[ - "/usr/platform/oxide/bin/tmpx", - &format!("{}", now.as_secs()), - &file.as_str(), - ]); - match execute(cmd) { - Err(e) => { - warn!(self.inner.log, "Updating {} failed: {}", &file, e); - } - Ok(_) => { - info!(self.inner.log, "Updated {}", &file); - } - } + // Call out to the 'tmpx' utility program which will rewrite the wtmpx + // and utmpx databases in every zone, including the global zone, to + // reflect the adjusted system boot time. + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&["/usr/platform/oxide/bin/tmpx", "-Z"]); + if let Err(e) = execute(cmd) { + warn!(self.inner.log, "Updating [wu]tmpx databases failed: {}", e); } } @@ -2980,7 +2955,7 @@ impl ServiceManager { if let Some(true) = self.inner.skip_timesync { info!(self.inner.log, "Configured to skip timesync checks"); - self.boottime_rewrite(existing_zones.values()); + self.boottime_rewrite(); return Ok(TimeSync { sync: true, ref_id: 0, @@ -3034,7 +3009,7 @@ impl ServiceManager { && correction.abs() <= 0.05; if sync { - self.boottime_rewrite(existing_zones.values()); + self.boottime_rewrite(); } Ok(TimeSync { diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index d3ecd8eaa8..1ce133dff3 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -131,6 +131,8 @@ function install_packages { "library/libxmlsec1" # "bindgen leverages libclang to preprocess, parse, and type check C and C++ header files." "pkg:/ooce/developer/clang-$CLANGVER" + "system/library/gcc-runtime" + "system/library/g++-runtime" ) # Install/update the set of packages. From 15115a461c1df915bd9b6f2e71855c126d8e4f6c Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 4 Dec 2023 12:33:07 -0800 Subject: [PATCH 044/186] [easy] [update-engine] change an info to a debug (#4603) Saw `wicket rack-update` print out this info message even though the group displayer would show a corresponding message anyway. --- update-engine/src/display/group_display.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update-engine/src/display/group_display.rs b/update-engine/src/display/group_display.rs index cfd37aac16..0e04361ce4 100644 --- a/update-engine/src/display/group_display.rs +++ b/update-engine/src/display/group_display.rs @@ -153,7 +153,7 @@ impl GroupDisplay { self.stats.apply_result(result); if result.before != result.after { - slog::info!( + slog::debug!( self.log, "add_event_report caused state transition"; "prefix" => &state.prefix, From c915eeb77fc153376a0734f712deaf1525a391f1 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 4 Dec 2023 16:21:08 -0500 Subject: [PATCH 045/186] Detect and un-delete phantom disks (#4547) A "phantom" disk is a disk where the disk delete saga has run for it but unwound: this leaves that disk soft-deleted, but the resources and accounting for that disk remain. Users cannot request that the disk be deleted again, and it remains a phantom. There are two fixes for this: 1. Change the disk delete saga to undo the disk's soft delete and set the disk to faulted during an unwind. This way, users can request that disks be deleted repeatedly until it works. 2. Create a background task that detects these phantom disks and does the same thing: un-delete them and set them to faulted. This requires adding an index on `id` to the `disk` table, so the schema is bumped to 12.0.1. Fixes oxidecomputer/customer-support#58. --- common/src/nexus_config.rs | 24 +- dev-tools/omdb/src/bin/omdb/nexus.rs | 26 ++ dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 12 + nexus/db-model/src/schema.rs | 6 +- nexus/db-queries/src/db/datastore/disk.rs | 290 +++++++++++++++++++++- nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 23 +- nexus/src/app/background/mod.rs | 1 + nexus/src/app/background/phantom_disks.rs | 104 ++++++++ nexus/src/app/sagas/disk_delete.rs | 19 +- nexus/tests/config.test.toml | 1 + nexus/tests/integration_tests/disks.rs | 132 ++++++++++ schema/crdb/18.0.0/up01.sql | 4 + schema/crdb/dbinit.sql | 7 +- sled-agent/src/sim/storage.rs | 14 ++ smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 18 files changed, 667 insertions(+), 11 deletions(-) create mode 100644 nexus/src/app/background/phantom_disks.rs create mode 100644 schema/crdb/18.0.0/up01.sql diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 94c39b4436..740823e755 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -339,6 +339,8 @@ pub struct BackgroundTaskConfig { pub nat_cleanup: NatCleanupConfig, /// configuration for inventory tasks pub inventory: InventoryConfig, + /// configuration for phantom disks task + pub phantom_disks: PhantomDiskConfig, } #[serde_as] @@ -386,7 +388,7 @@ pub struct NatCleanupConfig { pub struct InventoryConfig { /// period (in seconds) for periodic activations of this background task /// - /// Each activation fetches information about all harware and software in + /// Each activation fetches information about all hardware and software in /// the system and inserts it into the database. This generates a moderate /// amount of data. #[serde_as(as = "DurationSeconds")] @@ -405,6 +407,14 @@ pub struct InventoryConfig { pub disable: bool, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct PhantomDiskConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -508,8 +518,9 @@ mod test { BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig, Database, DeploymentConfig, DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError, - LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, SchemeName, - TimeseriesDbConfig, Tunables, UpdatesConfig, + LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, + PhantomDiskConfig, SchemeName, TimeseriesDbConfig, Tunables, + UpdatesConfig, }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; @@ -663,6 +674,7 @@ mod test { inventory.period_secs = 10 inventory.nkeep = 11 inventory.disable = false + phantom_disks.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -764,7 +776,10 @@ mod test { period_secs: Duration::from_secs(10), nkeep: 11, disable: false, - } + }, + phantom_disks: PhantomDiskConfig { + period_secs: Duration::from_secs(30), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -822,6 +837,7 @@ mod test { inventory.period_secs = 10 inventory.nkeep = 3 inventory.disable = false + phantom_disks.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 9f91d38504..df5248b52d 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -515,6 +515,32 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { ); } }; + } else if name == "phantom_disks" { + #[derive(Deserialize)] + struct TaskSuccess { + /// how many phantom disks were deleted ok + phantom_disk_deleted_ok: usize, + + /// how many phantom disks could not be deleted + phantom_disk_deleted_err: usize, + } + + match serde_json::from_value::(details.clone()) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(success) => { + println!( + " number of phantom disks deleted: {}", + success.phantom_disk_deleted_ok + ); + println!( + " number of phantom disk delete errors: {}", + success.phantom_disk_deleted_err + ); + } + }; } else { println!( "warning: unknown background task: {:?} \ diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index fd50d80c81..c08f592852 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -66,6 +66,10 @@ task: "nat_v4_garbage_collector" predetermined retention policy +task: "phantom_disks" + detects and un-deletes phantom disks + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -131,6 +135,10 @@ task: "nat_v4_garbage_collector" predetermined retention policy +task: "phantom_disks" + detects and un-deletes phantom disks + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -183,6 +191,10 @@ task: "nat_v4_garbage_collector" predetermined retention policy +task: "phantom_disks" + detects and un-deletes phantom disks + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 6bc3a85e8a..65520ab59c 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -260,6 +260,10 @@ task: "nat_v4_garbage_collector" predetermined retention policy +task: "phantom_disks" + detects and un-deletes phantom disks + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -357,6 +361,14 @@ task: "inventory_collection" last collection started: last collection done: +task: "phantom_disks" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + number of phantom disks deleted: 0 + number of phantom disk delete errors: 0 + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index be345032ac..373785799e 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1301,7 +1301,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(17, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(18, 0, 0); allow_tables_to_appear_in_same_query!( system_update, @@ -1370,3 +1370,7 @@ allow_tables_to_appear_in_same_query!( switch_port, switch_port_settings_bgp_peer_config ); + +allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource); + +allow_tables_to_appear_in_same_query!(volume, virtual_provisioning_resource); diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index a0d9bf12c3..26d439b350 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -25,11 +25,14 @@ use crate::db::model::DiskUpdate; use crate::db::model::Instance; use crate::db::model::Name; use crate::db::model::Project; +use crate::db::model::VirtualProvisioningResource; +use crate::db::model::Volume; use crate::db::pagination::paginated; use crate::db::queries::disk::DiskSetClauseForAttach; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::DateTime; use chrono::Utc; use diesel::prelude::*; use omicron_common::api; @@ -564,7 +567,7 @@ impl DataStore { /// Updates a disk record to indicate it has been deleted. /// - /// Returns the volume ID of associated with the deleted disk. + /// Returns the disk before any modifications are made by this function. /// /// Does not attempt to modify any resources (e.g. regions) which may /// belong to the disk. @@ -652,4 +655,289 @@ impl DataStore { } } } + + /// Set a disk to faulted and un-delete it + /// + /// If the disk delete saga unwinds, then the disk should _not_ remain + /// deleted: disk delete saga should be triggered again in order to fully + /// complete, and the only way to do that is to un-delete the disk. Set it + /// to faulted to ensure that it won't be used. + pub async fn project_undelete_disk_set_faulted_no_auth( + &self, + disk_id: &Uuid, + ) -> Result<(), Error> { + use db::schema::disk::dsl; + let conn = self.pool_connection_unauthorized().await?; + + let faulted = api::external::DiskState::Faulted.label(); + + let result = diesel::update(dsl::disk) + .filter(dsl::time_deleted.is_not_null()) + .filter(dsl::id.eq(*disk_id)) + .set(( + dsl::time_deleted.eq(None::>), + dsl::disk_state.eq(faulted), + )) + .check_if_exists::(*disk_id) + .execute_and_check(&conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Disk, + LookupType::ById(*disk_id), + ), + ) + })?; + + match result.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + let disk = result.found; + let disk_state = disk.state(); + + if disk.time_deleted().is_none() + && disk_state.state() == &api::external::DiskState::Faulted + { + // To maintain idempotency, if the disk has already been + // faulted, don't throw an error. + return Ok(()); + } else { + // NOTE: This is a "catch-all" error case, more specific + // errors should be preferred as they're more actionable. + return Err(Error::InternalError { + internal_message: String::from( + "disk exists, but cannot be faulted", + ), + }); + } + } + } + } + + /// Find disks that have been deleted but still have a + /// `virtual_provisioning_resource` record: this indicates that a disk + /// delete saga partially succeeded, then unwound, which (before the fixes + /// in customer-support#58) would mean the disk was deleted but the project + /// it was in could not be deleted (due to an erroneous number of bytes + /// "still provisioned"). + pub async fn find_phantom_disks(&self) -> ListResultVec { + use db::schema::disk::dsl; + use db::schema::virtual_provisioning_resource::dsl as resource_dsl; + use db::schema::volume::dsl as volume_dsl; + + let conn = self.pool_connection_unauthorized().await?; + + let potential_phantom_disks: Vec<( + Disk, + Option, + Option, + )> = dsl::disk + .filter(dsl::time_deleted.is_not_null()) + .left_join( + resource_dsl::virtual_provisioning_resource + .on(resource_dsl::id.eq(dsl::id)), + ) + .left_join(volume_dsl::volume.on(dsl::volume_id.eq(volume_dsl::id))) + .select(( + Disk::as_select(), + Option::::as_select(), + Option::::as_select(), + )) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // The first forward steps of the disk delete saga (plus the volume + // delete sub saga) are as follows: + // + // 1. soft-delete the disk + // 2. call virtual_provisioning_collection_delete_disk + // 3. soft-delete the disk's volume + // + // Before the fixes as part of customer-support#58, steps 1 and 3 did + // not have undo steps, where step 2 did. In order to detect when the + // disk delete saga unwound, find entries where + // + // 1. the disk and volume are soft-deleted + // 2. the `virtual_provisioning_resource` exists + // + // It's important not to conflict with any currently running disk delete + // saga. + + Ok(potential_phantom_disks + .into_iter() + .filter(|(disk, resource, volume)| { + if let Some(volume) = volume { + // In this branch, the volume record exists. Because it was + // returned by the query above, if it is soft-deleted we + // then know the saga unwound before the volume record could + // be hard deleted. This won't conflict with a running disk + // delete saga, because the resource record should be None + // if the disk and volume were already soft deleted (if + // there is one, the saga will be at or past step 3). + disk.time_deleted().is_some() + && volume.time_deleted.is_some() + && resource.is_some() + } else { + // In this branch, the volume record was hard-deleted. The + // saga could still have unwound after hard deleting the + // volume record, so proceed with filtering. This won't + // conflict with a running disk delete saga because the + // resource record should be None if the disk was soft + // deleted and the volume was hard deleted (if there is one, + // the saga should be almost finished as the volume hard + // delete is the last thing it does). + disk.time_deleted().is_some() && resource.is_some() + } + }) + .map(|(disk, _, _)| disk) + .collect()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::db::datastore::datastore_test; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params; + use omicron_common::api::external; + use omicron_test_utils::dev; + + #[tokio::test] + async fn test_undelete_disk_set_faulted_idempotent() { + let logctx = + dev::test_setup_log("test_undelete_disk_set_faulted_idempotent"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let (opctx, db_datastore) = datastore_test(&logctx, &db).await; + + let silo_id = opctx.authn.actor().unwrap().silo_id().unwrap(); + + let (authz_project, _db_project) = db_datastore + .project_create( + &opctx, + Project::new( + silo_id, + params::ProjectCreate { + identity: external::IdentityMetadataCreateParams { + name: "testpost".parse().unwrap(), + description: "please ignore".to_string(), + }, + }, + ), + ) + .await + .unwrap(); + + let disk = db_datastore + .project_create_disk( + &opctx, + &authz_project, + Disk::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::DiskCreate { + identity: external::IdentityMetadataCreateParams { + name: "first-post".parse().unwrap(), + description: "just trying things out".to_string(), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512) + .unwrap(), + }, + size: external::ByteCount::from(2147483648), + }, + db::model::BlockSize::Traditional, + DiskRuntimeState::new(), + ) + .unwrap(), + ) + .await + .unwrap(); + + let (.., authz_disk, db_disk) = LookupPath::new(&opctx, &db_datastore) + .disk_id(disk.id()) + .fetch() + .await + .unwrap(); + + db_datastore + .disk_update_runtime( + &opctx, + &authz_disk, + &db_disk.runtime().detach(), + ) + .await + .unwrap(); + + db_datastore + .project_delete_disk_no_auth( + &authz_disk.id(), + &[external::DiskState::Detached], + ) + .await + .unwrap(); + + // Assert initial state - deleting the Disk will make LookupPath::fetch + // not work. + { + LookupPath::new(&opctx, &db_datastore) + .disk_id(disk.id()) + .fetch() + .await + .unwrap_err(); + } + + // Function under test: call this twice to ensure it's idempotent + + db_datastore + .project_undelete_disk_set_faulted_no_auth(&authz_disk.id()) + .await + .unwrap(); + + // Assert state change + + { + let (.., db_disk) = LookupPath::new(&opctx, &db_datastore) + .disk_id(disk.id()) + .fetch() + .await + .unwrap(); + + assert!(db_disk.time_deleted().is_none()); + assert_eq!( + db_disk.runtime().disk_state, + external::DiskState::Faulted.label().to_string() + ); + } + + db_datastore + .project_undelete_disk_set_faulted_no_auth(&authz_disk.id()) + .await + .unwrap(); + + // Assert state is the same after the second call + + { + let (.., db_disk) = LookupPath::new(&opctx, &db_datastore) + .disk_id(disk.id()) + .fetch() + .await + .unwrap(); + + assert!(db_disk.time_deleted().is_none()); + assert_eq!( + db_disk.runtime().disk_state, + external::DiskState::Faulted.label().to_string() + ); + } + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 3679fa8196..9d6bf2d22f 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -100,6 +100,7 @@ inventory.period_secs = 600 inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false +phantom_disks.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d27248ffdc..cfa023a013 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -11,6 +11,7 @@ use super::dns_servers; use super::external_endpoints; use super::inventory_collection; use super::nat_cleanup; +use super::phantom_disks; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; @@ -52,6 +53,9 @@ pub struct BackgroundTasks { /// task handle for the task that collects inventory pub task_inventory_collection: common::TaskHandle, + + /// task handle for the task that detects phantom disks + pub task_phantom_disks: common::TaskHandle, } impl BackgroundTasks { @@ -122,7 +126,7 @@ impl BackgroundTasks { // Background task: inventory collector let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( - datastore, + datastore.clone(), resolver, &nexus_id.to_string(), config.inventory.nkeep, @@ -143,6 +147,22 @@ impl BackgroundTasks { task }; + // Background task: phantom disk detection + let task_phantom_disks = { + let detector = phantom_disks::PhantomDiskDetector::new(datastore); + + let task = driver.register( + String::from("phantom_disks"), + String::from("detects and un-deletes phantom disks"), + config.phantom_disks.period_secs, + Box::new(detector), + opctx.child(BTreeMap::new()), + vec![], + ); + + task + }; + BackgroundTasks { driver, task_internal_dns_config, @@ -153,6 +173,7 @@ impl BackgroundTasks { external_endpoints, nat_cleanup, task_inventory_collection, + task_phantom_disks, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 954207cb3c..70b20224d4 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -12,6 +12,7 @@ mod external_endpoints; mod init; mod inventory_collection; mod nat_cleanup; +mod phantom_disks; mod status; pub use common::Driver; diff --git a/nexus/src/app/background/phantom_disks.rs b/nexus/src/app/background/phantom_disks.rs new file mode 100644 index 0000000000..b038d70ac6 --- /dev/null +++ b/nexus/src/app/background/phantom_disks.rs @@ -0,0 +1,104 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting and un-deleting phantom disks +//! +//! A "phantom" disk is one where a disk delete saga partially completed but +//! unwound: before a fix for customer-support#58, this would leave disks +//! deleted but would also leave a `virtual_provisioning_resource` record for +//! that disk. There would be no way to re-trigger the disk delete saga as the +//! disk was deleted, so the project that disk was in could not be deleted +//! because associated virtual provisioning resources were still being consumed. +//! +//! The fix for customer-support#58 changes the disk delete saga's unwind to +//! also un-delete the disk and set it to faulted. This enables it to be deleted +//! again. Correcting the disk delete saga's unwind means that phantom disks +//! will not be created in the future when the disk delete saga unwinds, but +//! this background task is required to apply the same fix for disks that are +//! already in this phantom state. + +use super::common::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use serde_json::json; +use std::sync::Arc; + +pub struct PhantomDiskDetector { + datastore: Arc, +} + +impl PhantomDiskDetector { + pub fn new(datastore: Arc) -> Self { + PhantomDiskDetector { datastore } + } +} + +impl BackgroundTask for PhantomDiskDetector { + fn activate<'a, 'b, 'c>( + &'a mut self, + opctx: &'b OpContext, + ) -> BoxFuture<'c, serde_json::Value> + where + 'a: 'c, + 'b: 'c, + { + async { + let log = &opctx.log; + warn!(&log, "phantom disk task started"); + + let phantom_disks = match self.datastore.find_phantom_disks().await + { + Ok(phantom_disks) => phantom_disks, + Err(e) => { + warn!(&log, "error from find_phantom_disks: {:?}", e); + return json!({ + "error": + format!("failed find_phantom_disks: {:#}", e) + }); + } + }; + + let mut phantom_disk_deleted_ok = 0; + let mut phantom_disk_deleted_err = 0; + + for disk in phantom_disks { + warn!(&log, "phantom disk {} found!", disk.id()); + + // If a phantom disk is found, then un-delete it and set it to + // faulted: this will allow a user to request deleting it again. + + let result = self + .datastore + .project_undelete_disk_set_faulted_no_auth(&disk.id()) + .await; + + if let Err(e) = result { + error!( + &log, + "error un-deleting disk {} and setting to faulted: {:#}", + disk.id(), + e, + ); + phantom_disk_deleted_err += 1; + } else { + info!( + &log, + "phandom disk {} un-deleted andset to faulted ok", + disk.id(), + ); + phantom_disk_deleted_ok += 1; + } + } + + warn!(&log, "phantom disk task done"); + json!({ + "phantom_disk_deleted_ok": phantom_disk_deleted_ok, + "phantom_disk_deleted_err": phantom_disk_deleted_err, + }) + } + .boxed() + } +} diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index f2d80d64f5..8f6d74da0a 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -32,10 +32,8 @@ pub(crate) struct Params { declare_saga_actions! { disk_delete; DELETE_DISK_RECORD -> "deleted_disk" { - // TODO: See the comment on the "DeleteRegions" step, - // we may want to un-delete the disk if we cannot remove - // underlying regions. + sdd_delete_disk_record + - sdd_delete_disk_record_undo } SPACE_ACCOUNT -> "no_result1" { + sdd_account_space @@ -117,6 +115,21 @@ async fn sdd_delete_disk_record( Ok(disk) } +async fn sdd_delete_disk_record_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + osagactx + .datastore() + .project_undelete_disk_set_faulted_no_auth(¶ms.disk_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + async fn sdd_account_space( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index fbed9aed8e..a4436234f0 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -98,6 +98,7 @@ inventory.period_secs = 600 inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false +phantom_disks.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index a5a8339c34..f7403275b1 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1241,6 +1241,138 @@ async fn test_disk_virtual_provisioning_collection( ); } +#[nexus_test] +async fn test_disk_virtual_provisioning_collection_failed_delete( + cptestctx: &ControlPlaneTestContext, +) { + // Confirm that there's a panic deleting a project if a disk deletion fails + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + + let disk_test = DiskTest::new(&cptestctx).await; + + populate_ip_pool(&client, "default", None).await; + let project_id1 = create_project(client, PROJECT_NAME).await.identity.id; + + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create a 1 GB disk + let disk_size = ByteCount::from_gibibytes_u32(1); + let disks_url = get_disks_url(); + let disk_one = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk-one".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&disk_one)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure creating 1 GiB disk"); + + // Assert correct virtual provisioning collection numbers + let virtual_provisioning_collection = datastore + .virtual_provisioning_collection_get(&opctx, project_id1) + .await + .unwrap(); + assert_eq!( + virtual_provisioning_collection.virtual_disk_bytes_provisioned.0, + disk_size + ); + + // Set the third agent to fail when deleting regions + let zpool = &disk_test.zpools[2]; + let dataset = &zpool.datasets[0]; + disk_test + .sled_agent + .get_crucible_dataset(zpool.id, dataset.id) + .await + .set_region_deletion_error(true) + .await; + + // Delete the disk - expect this to fail + let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME); + + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &disk_url) + .expect_status(Some(StatusCode::INTERNAL_SERVER_ERROR)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected success deleting 1 GiB disk"); + + // The virtual provisioning collection numbers haven't changed + let virtual_provisioning_collection = datastore + .virtual_provisioning_collection_get(&opctx, project_id1) + .await + .unwrap(); + assert_eq!( + virtual_provisioning_collection.virtual_disk_bytes_provisioned.0, + disk_size + ); + + // And the disk is now faulted + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Faulted); + + // Set the third agent to respond normally + disk_test + .sled_agent + .get_crucible_dataset(zpool.id, dataset.id) + .await + .set_region_deletion_error(false) + .await; + + // Request disk delete again + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &disk_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure deleting 1 GiB disk"); + + // Delete the project's default VPC subnet and VPC + let subnet_url = + format!("/v1/vpc-subnets/default?project={}&vpc=default", PROJECT_NAME); + NexusRequest::object_delete(&client, &subnet_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to make request"); + + let vpc_url = format!("/v1/vpcs/default?project={}", PROJECT_NAME); + NexusRequest::object_delete(&client, &vpc_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to make request"); + + // The project can be deleted now + let url = format!("/v1/projects/{}", PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure deleting project"); +} + // Test disk size accounting #[nexus_test] async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { diff --git a/schema/crdb/18.0.0/up01.sql b/schema/crdb/18.0.0/up01.sql new file mode 100644 index 0000000000..018bb36dcb --- /dev/null +++ b/schema/crdb/18.0.0/up01.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_deleted_disk ON omicron.public.disk ( + id +) WHERE + time_deleted IS NOT NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f4caa2a4e6..f82829a2d9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1026,6 +1026,11 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_disk_by_instance ON omicron.public.disk ) WHERE time_deleted IS NULL AND attach_instance_id IS NOT NULL; +CREATE UNIQUE INDEX IF NOT EXISTS lookup_deleted_disk ON omicron.public.disk ( + id +) WHERE + time_deleted IS NOT NULL; + CREATE TABLE IF NOT EXISTS omicron.public.image ( /* Identity metadata (resource) */ id UUID PRIMARY KEY, @@ -3009,7 +3014,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '17.0.0', NULL) + ( TRUE, NOW(), NOW(), '18.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 2528a258d7..101228934d 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -40,6 +40,7 @@ struct CrucibleDataInner { running_snapshots: HashMap>, on_create: Option, region_creation_error: bool, + region_deletion_error: bool, creating_a_running_snapshot_should_fail: bool, next_port: u16, } @@ -53,6 +54,7 @@ impl CrucibleDataInner { running_snapshots: HashMap::new(), on_create: None, region_creation_error: false, + region_deletion_error: false, creating_a_running_snapshot_should_fail: false, next_port: crucible_port, } @@ -129,6 +131,10 @@ impl CrucibleDataInner { ); } + if self.region_deletion_error { + bail!("region deletion error!"); + } + let id = Uuid::from_str(&id.0).unwrap(); if let Some(region) = self.regions.get_mut(&id) { if region.state == State::Failed { @@ -229,6 +235,10 @@ impl CrucibleDataInner { self.region_creation_error = value; } + fn set_region_deletion_error(&mut self, value: bool) { + self.region_deletion_error = value; + } + fn create_running_snapshot( &mut self, id: &RegionId, @@ -391,6 +401,10 @@ impl CrucibleData { self.inner.lock().await.set_region_creation_error(value); } + pub async fn set_region_deletion_error(&self, value: bool) { + self.inner.lock().await.set_region_deletion_error(value); + } + pub async fn create_running_snapshot( &self, id: &RegionId, diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 94c8f5572e..d330f32ab6 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -46,6 +46,7 @@ inventory.period_secs = 600 inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false +phantom_disks.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index fcaa6176a8..cbd4851613 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -46,6 +46,7 @@ inventory.period_secs = 600 inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false +phantom_disks.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. From 9b666e73dec06f2a645a714cdfbb21d63a2f3504 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 4 Dec 2023 13:42:45 -0800 Subject: [PATCH 046/186] Support missing samples in oximeter (#4552) - Add a `Datum::Missing` and `MissingDatum`, which records the intended datum type and an optional start time for a sample which could not be produced. - Database upgrades which make all scalar datum columns Nullable. Array fields are _not_ made Nullable, since ClickHouse doesn't support composite types like arrays inside a Nullable wrapper type. The empty array is used as a sentinel, which is OK since we can't have zero-length array histograms in Oximeter. Add a test which will fail if we ever change that. - Rework database serialization to handle Nullable types or empty arrays. This uses a new helper trait to convert a NULL (which has no type information) to the intended datum type, or an empty array to a histogram. - Add a test for each measurement type that we can recover a missing sample of that type -- NULLs for scalar values and empty arrays for histograms. --- Cargo.lock | 1 + openapi/nexus-internal.json | 119 ++++ openapi/nexus.json | 67 +++ openapi/sled-agent.json | 119 ++++ oximeter/db/notes.txt | 232 -------- oximeter/db/schema/replicated/4/up01.sql | 1 + oximeter/db/schema/replicated/4/up02.sql | 1 + oximeter/db/schema/replicated/4/up03.sql | 1 + oximeter/db/schema/replicated/4/up04.sql | 1 + oximeter/db/schema/replicated/4/up05.sql | 1 + oximeter/db/schema/replicated/4/up06.sql | 1 + oximeter/db/schema/replicated/4/up07.sql | 1 + oximeter/db/schema/replicated/4/up08.sql | 1 + oximeter/db/schema/replicated/4/up09.sql | 1 + oximeter/db/schema/replicated/4/up10.sql | 1 + oximeter/db/schema/replicated/4/up11.sql | 1 + oximeter/db/schema/replicated/4/up12.sql | 1 + oximeter/db/schema/replicated/4/up13.sql | 1 + oximeter/db/schema/replicated/4/up14.sql | 1 + oximeter/db/schema/replicated/4/up15.sql | 1 + oximeter/db/schema/replicated/4/up16.sql | 1 + oximeter/db/schema/replicated/4/up17.sql | 1 + oximeter/db/schema/replicated/4/up18.sql | 1 + oximeter/db/schema/replicated/4/up19.sql | 1 + oximeter/db/schema/replicated/4/up20.sql | 1 + oximeter/db/schema/replicated/4/up21.sql | 1 + oximeter/db/schema/replicated/4/up22.sql | 1 + oximeter/db/schema/replicated/4/up23.sql | 1 + oximeter/db/schema/replicated/4/up24.sql | 1 + oximeter/db/schema/replicated/4/up25.sql | 1 + oximeter/db/schema/replicated/4/up26.sql | 1 + oximeter/db/schema/replicated/4/up27.sql | 1 + oximeter/db/schema/replicated/4/up28.sql | 1 + oximeter/db/schema/replicated/4/up29.sql | 1 + oximeter/db/schema/replicated/4/up30.sql | 1 + oximeter/db/schema/replicated/4/up31.sql | 1 + oximeter/db/schema/replicated/4/up32.sql | 1 + oximeter/db/schema/replicated/db-init.sql | 64 +-- oximeter/db/schema/single-node/4/up01.sql | 9 + oximeter/db/schema/single-node/4/up02.sql | 1 + oximeter/db/schema/single-node/4/up03.sql | 1 + oximeter/db/schema/single-node/4/up04.sql | 1 + oximeter/db/schema/single-node/4/up05.sql | 1 + oximeter/db/schema/single-node/4/up06.sql | 1 + oximeter/db/schema/single-node/4/up07.sql | 1 + oximeter/db/schema/single-node/4/up08.sql | 1 + oximeter/db/schema/single-node/4/up09.sql | 1 + oximeter/db/schema/single-node/4/up10.sql | 1 + oximeter/db/schema/single-node/4/up11.sql | 1 + oximeter/db/schema/single-node/4/up12.sql | 1 + oximeter/db/schema/single-node/4/up13.sql | 1 + oximeter/db/schema/single-node/4/up14.sql | 1 + oximeter/db/schema/single-node/4/up15.sql | 1 + oximeter/db/schema/single-node/4/up16.sql | 1 + oximeter/db/schema/single-node/db-init.sql | 49 +- oximeter/db/src/client.rs | 195 ++++--- oximeter/db/src/model.rs | 636 ++++++++++++++++++--- oximeter/oximeter/Cargo.toml | 1 + oximeter/oximeter/src/histogram.rs | 13 +- oximeter/oximeter/src/test_util.rs | 12 +- oximeter/oximeter/src/traits.rs | 36 +- oximeter/oximeter/src/types.rs | 105 +++- 62 files changed, 1255 insertions(+), 450 deletions(-) delete mode 100644 oximeter/db/notes.txt create mode 100644 oximeter/db/schema/replicated/4/up01.sql create mode 100644 oximeter/db/schema/replicated/4/up02.sql create mode 100644 oximeter/db/schema/replicated/4/up03.sql create mode 100644 oximeter/db/schema/replicated/4/up04.sql create mode 100644 oximeter/db/schema/replicated/4/up05.sql create mode 100644 oximeter/db/schema/replicated/4/up06.sql create mode 100644 oximeter/db/schema/replicated/4/up07.sql create mode 100644 oximeter/db/schema/replicated/4/up08.sql create mode 100644 oximeter/db/schema/replicated/4/up09.sql create mode 100644 oximeter/db/schema/replicated/4/up10.sql create mode 100644 oximeter/db/schema/replicated/4/up11.sql create mode 100644 oximeter/db/schema/replicated/4/up12.sql create mode 100644 oximeter/db/schema/replicated/4/up13.sql create mode 100644 oximeter/db/schema/replicated/4/up14.sql create mode 100644 oximeter/db/schema/replicated/4/up15.sql create mode 100644 oximeter/db/schema/replicated/4/up16.sql create mode 100644 oximeter/db/schema/replicated/4/up17.sql create mode 100644 oximeter/db/schema/replicated/4/up18.sql create mode 100644 oximeter/db/schema/replicated/4/up19.sql create mode 100644 oximeter/db/schema/replicated/4/up20.sql create mode 100644 oximeter/db/schema/replicated/4/up21.sql create mode 100644 oximeter/db/schema/replicated/4/up22.sql create mode 100644 oximeter/db/schema/replicated/4/up23.sql create mode 100644 oximeter/db/schema/replicated/4/up24.sql create mode 100644 oximeter/db/schema/replicated/4/up25.sql create mode 100644 oximeter/db/schema/replicated/4/up26.sql create mode 100644 oximeter/db/schema/replicated/4/up27.sql create mode 100644 oximeter/db/schema/replicated/4/up28.sql create mode 100644 oximeter/db/schema/replicated/4/up29.sql create mode 100644 oximeter/db/schema/replicated/4/up30.sql create mode 100644 oximeter/db/schema/replicated/4/up31.sql create mode 100644 oximeter/db/schema/replicated/4/up32.sql create mode 100644 oximeter/db/schema/single-node/4/up01.sql create mode 100644 oximeter/db/schema/single-node/4/up02.sql create mode 100644 oximeter/db/schema/single-node/4/up03.sql create mode 100644 oximeter/db/schema/single-node/4/up04.sql create mode 100644 oximeter/db/schema/single-node/4/up05.sql create mode 100644 oximeter/db/schema/single-node/4/up06.sql create mode 100644 oximeter/db/schema/single-node/4/up07.sql create mode 100644 oximeter/db/schema/single-node/4/up08.sql create mode 100644 oximeter/db/schema/single-node/4/up09.sql create mode 100644 oximeter/db/schema/single-node/4/up10.sql create mode 100644 oximeter/db/schema/single-node/4/up11.sql create mode 100644 oximeter/db/schema/single-node/4/up12.sql create mode 100644 oximeter/db/schema/single-node/4/up13.sql create mode 100644 oximeter/db/schema/single-node/4/up14.sql create mode 100644 oximeter/db/schema/single-node/4/up15.sql create mode 100644 oximeter/db/schema/single-node/4/up16.sql diff --git a/Cargo.lock b/Cargo.lock index 329f74bb77..28d3015025 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5261,6 +5261,7 @@ dependencies = [ "rstest", "schemars", "serde", + "serde_json", "strum", "thiserror", "trybuild", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7785d232d9..caf1414f53 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2568,9 +2568,60 @@ "datum", "type" ] + }, + { + "type": "object", + "properties": { + "datum": { + "$ref": "#/components/schemas/MissingDatum" + }, + "type": { + "type": "string", + "enum": [ + "missing" + ] + } + }, + "required": [ + "datum", + "type" + ] } ] }, + "DatumType": { + "description": "The type of an individual datum of a metric.", + "type": "string", + "enum": [ + "bool", + "i8", + "u8", + "i16", + "u16", + "i32", + "u32", + "i64", + "u64", + "f32", + "f64", + "string", + "bytes", + "cumulative_i64", + "cumulative_u64", + "cumulative_f32", + "cumulative_f64", + "histogram_i8", + "histogram_u8", + "histogram_i16", + "histogram_u16", + "histogram_i32", + "histogram_u32", + "histogram_i64", + "histogram_u64", + "histogram_f32", + "histogram_f64" + ] + }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -4128,9 +4179,77 @@ "content", "type" ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + } + }, + "required": [ + "datum_type" + ] + }, + "type": { + "type": "string", + "enum": [ + "missing_datum_requires_start_time" + ] + } + }, + "required": [ + "content", + "type" + ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + } + }, + "required": [ + "datum_type" + ] + }, + "type": { + "type": "string", + "enum": [ + "missing_datum_cannot_have_start_time" + ] + } + }, + "required": [ + "content", + "type" + ] } ] }, + "MissingDatum": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + }, + "start_time": { + "nullable": true, + "type": "string", + "format": "date-time" + } + }, + "required": [ + "datum_type" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", diff --git a/openapi/nexus.json b/openapi/nexus.json index 15e75f93ff..a6dffc6265 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -9742,9 +9742,60 @@ "datum", "type" ] + }, + { + "type": "object", + "properties": { + "datum": { + "$ref": "#/components/schemas/MissingDatum" + }, + "type": { + "type": "string", + "enum": [ + "missing" + ] + } + }, + "required": [ + "datum", + "type" + ] } ] }, + "DatumType": { + "description": "The type of an individual datum of a metric.", + "type": "string", + "enum": [ + "bool", + "i8", + "u8", + "i16", + "u16", + "i32", + "u32", + "i64", + "u64", + "f32", + "f64", + "string", + "bytes", + "cumulative_i64", + "cumulative_u64", + "cumulative_f32", + "cumulative_f64", + "histogram_i8", + "histogram_u8", + "histogram_i16", + "histogram_u16", + "histogram_i32", + "histogram_u32", + "histogram_i64", + "histogram_u64", + "histogram_f32", + "histogram_f64" + ] + }, "DerEncodedKeyPair": { "type": "object", "properties": { @@ -12269,6 +12320,22 @@ "items" ] }, + "MissingDatum": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + }, + "start_time": { + "nullable": true, + "type": "string", + "format": "date-time" + } + }, + "required": [ + "datum_type" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 9951392e98..5e217b27a4 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2898,9 +2898,60 @@ "datum", "type" ] + }, + { + "type": "object", + "properties": { + "datum": { + "$ref": "#/components/schemas/MissingDatum" + }, + "type": { + "type": "string", + "enum": [ + "missing" + ] + } + }, + "required": [ + "datum", + "type" + ] } ] }, + "DatumType": { + "description": "The type of an individual datum of a metric.", + "type": "string", + "enum": [ + "bool", + "i8", + "u8", + "i16", + "u16", + "i32", + "u32", + "i64", + "u64", + "f32", + "f64", + "string", + "bytes", + "cumulative_i64", + "cumulative_u64", + "cumulative_f32", + "cumulative_f64", + "histogram_i8", + "histogram_u8", + "histogram_i16", + "histogram_u16", + "histogram_i32", + "histogram_u32", + "histogram_i64", + "histogram_u64", + "histogram_f32", + "histogram_f64" + ] + }, "DeleteVirtualNetworkInterfaceHost": { "description": "The data needed to identify a virtual IP for which a sled maintains an OPTE virtual-to-physical mapping such that that mapping can be deleted.", "type": "object", @@ -4819,9 +4870,77 @@ "content", "type" ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + } + }, + "required": [ + "datum_type" + ] + }, + "type": { + "type": "string", + "enum": [ + "missing_datum_requires_start_time" + ] + } + }, + "required": [ + "content", + "type" + ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + } + }, + "required": [ + "datum_type" + ] + }, + "type": { + "type": "string", + "enum": [ + "missing_datum_cannot_have_start_time" + ] + } + }, + "required": [ + "content", + "type" + ] } ] }, + "MissingDatum": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + }, + "start_time": { + "nullable": true, + "type": "string", + "format": "date-time" + } + }, + "required": [ + "datum_type" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", diff --git a/oximeter/db/notes.txt b/oximeter/db/notes.txt deleted file mode 100644 index 66c3871d46..0000000000 --- a/oximeter/db/notes.txt +++ /dev/null @@ -1,232 +0,0 @@ -Some notes on querying - -For pagination: - -- Timeseries name is enough for paginated list timeseries endpoint. -It's just normal keyset pagination. - -- For the timeseries data, we'll be using limit/offset pagination. We'll -run the query to get the consistent timeseries keys each time. This is -the `ScanParams` part of the `WhichPage`. The `PageSelector` is the offset. - - -Now, how to run more complex queries? A good example is something like, -aggregating the timeseries across all but one field. For example, let's -look at the Nexus HTTP latency data. The fields are: - -- name (String) -- id (Uuid) -- route (String) -- method (String) -- status_code (I64) - -Imagine we wanted to look at the average latency by route, so averaged -across all methods and status codes. (Let's ingore name/id) - -We need to group the timeseries keys by route, to find the set of keys -consistent with each different route. ClickHouse provides the `groupArray` -function, which is an aggregate function that collects multiple values -into an array. So we can do: - -``` -SELECT - field_value, - groupArray(timeseries_key) -FROM fields_string -WHERE field_name = 'route' -GROUP BY field_value; - - -┌─field_value───────────────────────────────────────────┬─groupArray(timeseries_key)────────────────┐ -│ /metrics/producers │ [1916712826069192294,6228796576473532827] │ -│ /metrics/collectors │ [1500085842574282480] │ -│ /metrics/collect/e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c │ [15389669872422126367] │ -│ /sled_agents/fb0f7546-4d46-40ca-9d56-cbb810684ca7 │ [1166666993114742619] │ -└───────────────────────────────────────────────────────┴───────────────────────────────────────────┘ -``` - -This gives an array of timeseries keys where the route is each of the values -on the left. - -So at a very high level, we can average all the timeseries values where the keys -are in each of these different arrays. - - -This kinda works. It produces an array of arrays, the counts for each of the -histograms, grouped by the field value. - -``` -SELECT - field_value, - groupArray(counts) -FROM -( - SELECT - field_value, - timeseries_key - FROM fields_string - WHERE field_name = 'route' -) AS f0 -INNER JOIN -( - SELECT * - FROM measurements_histogramf64 -) AS meas USING (timeseries_key) -GROUP BY field_value -``` - -We can extend this `groupArray(bins), groupArray(counts)` to get both. - - -Ok, we're getting somewhere. The aggregation "combinators" modify the behavior of -aggregations, in pretty suprising and powerful ways. For example: - -``` -SELECT - field_value, - sumForEach(counts) -FROM -( - SELECT - field_value, - timeseries_key - FROM fields_string - WHERE field_name = 'route' -) AS f0 -INNER JOIN -( - SELECT * - FROM measurements_histogramf64 -) AS meas USING (timeseries_key) -GROUP BY field_value -``` - -This applies the `-ForEach` combinator to the sum aggregation. This applies the -aggregation to corresponding elements of a sequence (table?) of arrays. We can -do this with any of the aggregations, `avg`, `min`, etc. - - -The `-Resample` combinator also looks interesting. It uses its arguments to create -a set of intervals, and applies the aggregation within each of those intervals. -So sort of a group-by interval or window function. - -Another useful method is `toStartOfInterval`. This takes a timestamp and an interval, -say 5 seconds, or 10 minutes, and returns the interval into which that timestamp -falls. Could be very helpful for aligning/binning data to time intervals. But -it does "round", in that the bins don't start at the first timestamp, but at -the rounded-down interval from that timestamp. - -It's possible to build intervals that start exactly at the first timestamp with: - -``` -SELECT - timestamp, - toStartOfInterval(timestamp, toIntervalMinute(1)) + ( - SELECT toSecond(min(timestamp)) - FROM measurements_histogramf64 - ) -FROM measurements_histogramf64 -``` - -Or some other rounding shenanigans. - - -Putting lots of this together: - -``` -SELECT - f0.field_name, - f0.field_value, - f1.field_name, - f1.field_value, - minForEach(bins), - avgForEach(counts) -FROM -( - SELECT - field_name, - field_value, - timeseries_key - FROM fields_string - WHERE field_name = 'route' -) AS f0 -INNER JOIN -( - SELECT - field_name, - field_value, - timeseries_key - FROM fields_i64 - WHERE field_name = 'status_code' -) AS f1 ON f0.timeseries_key = f1.timeseries_key -INNER JOIN -( - SELECT * - FROM measurements_histogramf64 -) AS meas ON f1.timeseries_key = meas.timeseries_key -GROUP BY - f0.field_name, - f0.field_value, - f1.field_name, - f1.field_value -``` - -This selects the field name/value, and the bin and average count for each -histogram, grouping by route and status code. - -These inner select statements look similar to the ones we already -implement in `field.as_query`. But in that case we select *, and here we -probably don't want to do that to avoid errors about things not being -in aggregations or group by's. - -This works (or is syntactically valid) for scalars, if we replace the -combinators with their non-combinator version: e.g, `avgForEach` -> `avg`. - - -Other rando thoughts. - -It'd be nice to have the query builder be able to handle all these, but -I'm not sure how worth it that is. For example, I don't even think we need -the timeseries keys in this query. For the fields where we are specifying -a condition, we have subqueries like: - -``` -SELECT * -FROM fields_{TYPE} -WHERE field_name = NAME -AND field_value OP VALUE; -``` - -For ones where we _don't_ care, we just have the first three lines: - -``` -SELECT * -FROM fields_{TYPE} -WHERE field_name = NAME; -``` - -We can join successive entries on timeseries keys. - -For straight SELECT queries, that's pretty much it, like we have currently. -For AGGREGATION queries, we need to - -- Have a group-by for each (field_name, field_value) pair. This is true -even when we're unselective on the field, because we are still taking that -field, and we still need to group the keys accordingly. -- Select the consistent timeseries keys. This is so we can correlate the -results of the aggregation back to the field names/values which we still -get from the key-select query. -- Apply the aggregation to the measurements. For scalars, this just the -aggregation. For histograms, this is the `-Array` or `-ForEach` combinator -for that aggregation, depending on what we're applying. -- ??? to the timestamps? -- some alignment, grouping, subsampling? It seems -this has to come from the aggregation query, because there's not a useful -default. - -Speaking of defaults, how do these functions behave with missing data? -Or more subtly, what happens if two histograms (say) have the same number -of bins, but the actual bin edges are different? ClickHouse itself doesn't -deal with this AFAICT, which means we'd need to do that in the client. -Ah, but that is unlikely, since we're only aggregating data from the -same timeseries, with the same key. So far anyway. I'm not sure what'll -happen when we start correlating data between timeseries. diff --git a/oximeter/db/schema/replicated/4/up01.sql b/oximeter/db/schema/replicated/4/up01.sql new file mode 100644 index 0000000000..f36745ae2e --- /dev/null +++ b/oximeter/db/schema/replicated/4/up01.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_bool_local MODIFY COLUMN datum Nullable(UInt8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up02.sql b/oximeter/db/schema/replicated/4/up02.sql new file mode 100644 index 0000000000..0f76398652 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up02.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_bool MODIFY COLUMN datum Nullable(UInt8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up03.sql b/oximeter/db/schema/replicated/4/up03.sql new file mode 100644 index 0000000000..175b23d71b --- /dev/null +++ b/oximeter/db/schema/replicated/4/up03.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i8_local MODIFY COLUMN datum Nullable(Int8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up04.sql b/oximeter/db/schema/replicated/4/up04.sql new file mode 100644 index 0000000000..4c8f22d8e6 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up04.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i8 MODIFY COLUMN datum Nullable(Int8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up05.sql b/oximeter/db/schema/replicated/4/up05.sql new file mode 100644 index 0000000000..82490a81ca --- /dev/null +++ b/oximeter/db/schema/replicated/4/up05.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u8_local MODIFY COLUMN datum Nullable(UInt8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up06.sql b/oximeter/db/schema/replicated/4/up06.sql new file mode 100644 index 0000000000..c689682127 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up06.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u8 MODIFY COLUMN datum Nullable(UInt8) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up07.sql b/oximeter/db/schema/replicated/4/up07.sql new file mode 100644 index 0000000000..43eb40515b --- /dev/null +++ b/oximeter/db/schema/replicated/4/up07.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i16_local MODIFY COLUMN datum Nullable(Int16) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up08.sql b/oximeter/db/schema/replicated/4/up08.sql new file mode 100644 index 0000000000..1d983a3c83 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up08.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i16 MODIFY COLUMN datum Nullable(Int16) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up09.sql b/oximeter/db/schema/replicated/4/up09.sql new file mode 100644 index 0000000000..e52c2adf5f --- /dev/null +++ b/oximeter/db/schema/replicated/4/up09.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u16_local MODIFY COLUMN datum Nullable(UInt16) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up10.sql b/oximeter/db/schema/replicated/4/up10.sql new file mode 100644 index 0000000000..d8a69fff1a --- /dev/null +++ b/oximeter/db/schema/replicated/4/up10.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u16 MODIFY COLUMN datum Nullable(UInt16) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up11.sql b/oximeter/db/schema/replicated/4/up11.sql new file mode 100644 index 0000000000..b3c2d8de92 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up11.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i32_local MODIFY COLUMN datum Nullable(Int32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up12.sql b/oximeter/db/schema/replicated/4/up12.sql new file mode 100644 index 0000000000..65fca2e1b2 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up12.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i32 MODIFY COLUMN datum Nullable(Int32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up13.sql b/oximeter/db/schema/replicated/4/up13.sql new file mode 100644 index 0000000000..df7c520e35 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up13.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u32_local MODIFY COLUMN datum Nullable(UInt32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up14.sql b/oximeter/db/schema/replicated/4/up14.sql new file mode 100644 index 0000000000..a4cb43fb90 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up14.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u32 MODIFY COLUMN datum Nullable(UInt32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up15.sql b/oximeter/db/schema/replicated/4/up15.sql new file mode 100644 index 0000000000..f7583dbdee --- /dev/null +++ b/oximeter/db/schema/replicated/4/up15.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i64_local MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up16.sql b/oximeter/db/schema/replicated/4/up16.sql new file mode 100644 index 0000000000..b458243d74 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i64 MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up17.sql b/oximeter/db/schema/replicated/4/up17.sql new file mode 100644 index 0000000000..9229a97704 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up17.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u64_local MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up18.sql b/oximeter/db/schema/replicated/4/up18.sql new file mode 100644 index 0000000000..6e2a2a5191 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up18.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u64 MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up19.sql b/oximeter/db/schema/replicated/4/up19.sql new file mode 100644 index 0000000000..8f16b5d41e --- /dev/null +++ b/oximeter/db/schema/replicated/4/up19.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f32_local MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up20.sql b/oximeter/db/schema/replicated/4/up20.sql new file mode 100644 index 0000000000..9263592740 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up20.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f32 MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up21.sql b/oximeter/db/schema/replicated/4/up21.sql new file mode 100644 index 0000000000..72abba6216 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up21.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f64_local MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up22.sql b/oximeter/db/schema/replicated/4/up22.sql new file mode 100644 index 0000000000..0d8522bc03 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up22.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f64 MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up23.sql b/oximeter/db/schema/replicated/4/up23.sql new file mode 100644 index 0000000000..96b94c2895 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up23.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativei64_local MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up24.sql b/oximeter/db/schema/replicated/4/up24.sql new file mode 100644 index 0000000000..55df76c25f --- /dev/null +++ b/oximeter/db/schema/replicated/4/up24.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativei64 MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up25.sql b/oximeter/db/schema/replicated/4/up25.sql new file mode 100644 index 0000000000..fac7369482 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up25.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativeu64_local MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up26.sql b/oximeter/db/schema/replicated/4/up26.sql new file mode 100644 index 0000000000..182b2b4704 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up26.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativeu64 MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up27.sql b/oximeter/db/schema/replicated/4/up27.sql new file mode 100644 index 0000000000..b482d00f81 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up27.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef32_local MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up28.sql b/oximeter/db/schema/replicated/4/up28.sql new file mode 100644 index 0000000000..cefbe56395 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up28.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef32 MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up29.sql b/oximeter/db/schema/replicated/4/up29.sql new file mode 100644 index 0000000000..59e21f353d --- /dev/null +++ b/oximeter/db/schema/replicated/4/up29.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef64_local MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up30.sql b/oximeter/db/schema/replicated/4/up30.sql new file mode 100644 index 0000000000..a609e6ad3c --- /dev/null +++ b/oximeter/db/schema/replicated/4/up30.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef64 MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/replicated/4/up31.sql b/oximeter/db/schema/replicated/4/up31.sql new file mode 100644 index 0000000000..3726895dd0 --- /dev/null +++ b/oximeter/db/schema/replicated/4/up31.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_string_local MODIFY COLUMN datum Nullable(String); diff --git a/oximeter/db/schema/replicated/4/up32.sql b/oximeter/db/schema/replicated/4/up32.sql new file mode 100644 index 0000000000..5a09705e7e --- /dev/null +++ b/oximeter/db/schema/replicated/4/up32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_string MODIFY COLUMN datum Nullable(String); diff --git a/oximeter/db/schema/replicated/db-init.sql b/oximeter/db/schema/replicated/db-init.sql index 4429f41364..27df02b709 100644 --- a/oximeter/db/schema/replicated/db-init.sql +++ b/oximeter/db/schema/replicated/db-init.sql @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_ timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -35,7 +35,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bool_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -44,7 +44,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cl timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int8 + datum Nullable(Int8) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -55,7 +55,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int8 + datum Nullable(Int8) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i8_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -64,7 +64,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cl timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -75,7 +75,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u8_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -84,7 +84,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int16 + datum Nullable(Int16) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -95,7 +95,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int16 + datum Nullable(Int16) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i16_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -104,7 +104,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt16 + datum Nullable(UInt16) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -115,7 +115,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt16 + datum Nullable(UInt16) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u16_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -124,7 +124,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int32 + datum Nullable(Int32) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -135,7 +135,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int32 + datum Nullable(Int32) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i32_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -144,7 +144,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt32 + datum Nullable(UInt32) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -155,7 +155,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt32 + datum Nullable(UInt32) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u32_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -164,7 +164,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -175,7 +175,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i64_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -184,7 +184,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -195,7 +195,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u64_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -204,7 +204,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -215,7 +215,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -224,7 +224,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_c timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -235,7 +235,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f64_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -244,7 +244,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximete timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum String + datum Nullable(String) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_string_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -255,7 +255,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum String + datum Nullable(String) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_string_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -285,7 +285,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativei64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -297,7 +297,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximet timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativei64_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -307,7 +307,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativeu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -319,7 +319,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximet timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativeu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -329,7 +329,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -341,7 +341,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximet timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef32_local', xxHash64(splitByChar(':', timeseries_name)[1])); @@ -351,7 +351,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -363,7 +363,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximet timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef64_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/single-node/4/up01.sql b/oximeter/db/schema/single-node/4/up01.sql new file mode 100644 index 0000000000..ccccc9c5fb --- /dev/null +++ b/oximeter/db/schema/single-node/4/up01.sql @@ -0,0 +1,9 @@ +/* + * To support missing measurements, we are making all scalar datum columns + * Nullable, so that a NULL value (None in Rust) represents a missing datum at + * the provided timestamp. + * + * Note that arrays cannot be made Nullable, so we need to use an empty array as + * the sentinel value implying a missing measurement. + */ +ALTER TABLE oximeter.measurements_bool MODIFY COLUMN datum Nullable(UInt8) diff --git a/oximeter/db/schema/single-node/4/up02.sql b/oximeter/db/schema/single-node/4/up02.sql new file mode 100644 index 0000000000..4c8f22d8e6 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up02.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i8 MODIFY COLUMN datum Nullable(Int8) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up03.sql b/oximeter/db/schema/single-node/4/up03.sql new file mode 100644 index 0000000000..c689682127 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up03.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u8 MODIFY COLUMN datum Nullable(UInt8) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up04.sql b/oximeter/db/schema/single-node/4/up04.sql new file mode 100644 index 0000000000..1d983a3c83 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up04.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i16 MODIFY COLUMN datum Nullable(Int16) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up05.sql b/oximeter/db/schema/single-node/4/up05.sql new file mode 100644 index 0000000000..d8a69fff1a --- /dev/null +++ b/oximeter/db/schema/single-node/4/up05.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u16 MODIFY COLUMN datum Nullable(UInt16) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up06.sql b/oximeter/db/schema/single-node/4/up06.sql new file mode 100644 index 0000000000..65fca2e1b2 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up06.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i32 MODIFY COLUMN datum Nullable(Int32) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up07.sql b/oximeter/db/schema/single-node/4/up07.sql new file mode 100644 index 0000000000..a4cb43fb90 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up07.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u32 MODIFY COLUMN datum Nullable(UInt32) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up08.sql b/oximeter/db/schema/single-node/4/up08.sql new file mode 100644 index 0000000000..b458243d74 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up08.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_i64 MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up09.sql b/oximeter/db/schema/single-node/4/up09.sql new file mode 100644 index 0000000000..6e2a2a5191 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up09.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_u64 MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up10.sql b/oximeter/db/schema/single-node/4/up10.sql new file mode 100644 index 0000000000..9263592740 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up10.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f32 MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up11.sql b/oximeter/db/schema/single-node/4/up11.sql new file mode 100644 index 0000000000..0d8522bc03 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up11.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_f64 MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up12.sql b/oximeter/db/schema/single-node/4/up12.sql new file mode 100644 index 0000000000..55df76c25f --- /dev/null +++ b/oximeter/db/schema/single-node/4/up12.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativei64 MODIFY COLUMN datum Nullable(Int64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up13.sql b/oximeter/db/schema/single-node/4/up13.sql new file mode 100644 index 0000000000..182b2b4704 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up13.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativeu64 MODIFY COLUMN datum Nullable(UInt64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up14.sql b/oximeter/db/schema/single-node/4/up14.sql new file mode 100644 index 0000000000..cefbe56395 --- /dev/null +++ b/oximeter/db/schema/single-node/4/up14.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef32 MODIFY COLUMN datum Nullable(Float32) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up15.sql b/oximeter/db/schema/single-node/4/up15.sql new file mode 100644 index 0000000000..a609e6ad3c --- /dev/null +++ b/oximeter/db/schema/single-node/4/up15.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_cumulativef64 MODIFY COLUMN datum Nullable(Float64) \ No newline at end of file diff --git a/oximeter/db/schema/single-node/4/up16.sql b/oximeter/db/schema/single-node/4/up16.sql new file mode 100644 index 0000000000..5a09705e7e --- /dev/null +++ b/oximeter/db/schema/single-node/4/up16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_string MODIFY COLUMN datum Nullable(String); diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index ee5e91c4b7..510c1071c8 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -35,7 +35,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int8 + datum Nullable(Int8) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -46,7 +46,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt8 + datum Nullable(UInt8) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -57,7 +57,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int16 + datum Nullable(Int16) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -68,7 +68,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt16 + datum Nullable(UInt16) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -79,7 +79,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int32 + datum Nullable(Int32) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -90,7 +90,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt32 + datum Nullable(UInt32) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -101,7 +101,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -112,7 +112,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -123,7 +123,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -134,7 +134,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -145,7 +145,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum String + datum Nullable(String) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -156,6 +156,13 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), + /* + * NOTE: Right now we can't unambiguously record a nullable byte array. + * Arrays cannot be nested in `Nullable()` types, and encoding the array as + * a string isn't palatable for a few reasons. + * See: https://github.com/oxidecomputer/omicron/issues/4551 for more + * details. + */ datum Array(UInt8) ) ENGINE = MergeTree() @@ -168,7 +175,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Int64 + datum Nullable(Int64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -180,7 +187,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum UInt64 + datum Nullable(UInt64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -192,7 +199,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float32 + datum Nullable(Float32) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -205,7 +212,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), - datum Float64 + datum Nullable(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -217,6 +224,16 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 timeseries_key UInt64, start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), + /* + * NOTE: Array types cannot be Nullable, see + * https://clickhouse.com/docs/en/sql-reference/data-types/nullable + * for more details. + * + * This means we need to use empty arrays to indicate a missing value. This + * is unfortunate, and at this point relies on the fact that an + * `oximeter::Histogram` cannot have zero bins. If that changes, we'll need + * to figure out another way to represent missing samples here. + */ bins Array(Int8), counts Array(UInt64) ) diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index e1ed06554c..c8a7db20cb 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -1190,7 +1190,7 @@ mod tests { use super::*; use crate::query; use crate::query::field_table_name; - use crate::query::measurement_table_name; + use bytes::Bytes; use chrono::Utc; use omicron_test_utils::dev::clickhouse::{ ClickHouseCluster, ClickHouseInstance, @@ -1198,8 +1198,10 @@ mod tests { use omicron_test_utils::dev::test_setup_log; use oximeter::histogram::Histogram; use oximeter::test_util; + use oximeter::types::MissingDatum; use oximeter::Datum; use oximeter::FieldValue; + use oximeter::Measurement; use oximeter::Metric; use oximeter::Target; use std::net::Ipv6Addr; @@ -2957,76 +2959,102 @@ mod tests { Ok(()) } + async fn test_recall_missing_scalar_measurement_impl( + measurement: Measurement, + client: &Client, + ) -> Result<(), Error> { + let start_time = if measurement.datum().is_cumulative() { + Some(Utc::now()) + } else { + None + }; + let missing_datum = Datum::from( + MissingDatum::new(measurement.datum_type(), start_time).unwrap(), + ); + let missing_measurement = Measurement::new(Utc::now(), missing_datum); + test_recall_measurement_impl(missing_measurement, client).await?; + Ok(()) + } + async fn recall_measurement_bool_test( client: &Client, ) -> Result<(), Error> { let datum = Datum::Bool(true); - let as_json = serde_json::Value::from(1_u64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_i8_test(client: &Client) -> Result<(), Error> { let datum = Datum::I8(1); - let as_json = serde_json::Value::from(1_i8); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_u8_test(client: &Client) -> Result<(), Error> { let datum = Datum::U8(1); - let as_json = serde_json::Value::from(1_u8); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_i16_test(client: &Client) -> Result<(), Error> { let datum = Datum::I16(1); - let as_json = serde_json::Value::from(1_i16); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_u16_test(client: &Client) -> Result<(), Error> { let datum = Datum::U16(1); - let as_json = serde_json::Value::from(1_u16); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_i32_test(client: &Client) -> Result<(), Error> { let datum = Datum::I32(1); - let as_json = serde_json::Value::from(1_i32); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_u32_test(client: &Client) -> Result<(), Error> { let datum = Datum::U32(1); - let as_json = serde_json::Value::from(1_u32); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_i64_test(client: &Client) -> Result<(), Error> { let datum = Datum::I64(1); - let as_json = serde_json::Value::from(1_i64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } async fn recall_measurement_u64_test(client: &Client) -> Result<(), Error> { let datum = Datum::U64(1); - let as_json = serde_json::Value::from(1_u64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } @@ -3034,9 +3062,9 @@ mod tests { async fn recall_measurement_f32_test(client: &Client) -> Result<(), Error> { const VALUE: f32 = 1.1; let datum = Datum::F32(VALUE); - // NOTE: This is intentionally an f64. - let as_json = serde_json::Value::from(1.1_f64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } @@ -3044,18 +3072,43 @@ mod tests { async fn recall_measurement_f64_test(client: &Client) -> Result<(), Error> { const VALUE: f64 = 1.1; let datum = Datum::F64(VALUE); - let as_json = serde_json::Value::from(VALUE); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } + async fn recall_measurement_string_test( + client: &Client, + ) -> Result<(), Error> { + let value = String::from("foo"); + let datum = Datum::String(value.clone()); + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) + .await?; + Ok(()) + } + + async fn recall_measurement_bytes_test( + client: &Client, + ) -> Result<(), Error> { + let value = Bytes::from(vec![0, 1, 2]); + let datum = Datum::Bytes(value.clone()); + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + // NOTE: We don't currently support missing byte array samples. + Ok(()) + } + async fn recall_measurement_cumulative_i64_test( client: &Client, ) -> Result<(), Error> { let datum = Datum::CumulativeI64(1.into()); - let as_json = serde_json::Value::from(1_i64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } @@ -3064,8 +3117,9 @@ mod tests { client: &Client, ) -> Result<(), Error> { let datum = Datum::CumulativeU64(1.into()); - let as_json = serde_json::Value::from(1_u64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } @@ -3074,8 +3128,9 @@ mod tests { client: &Client, ) -> Result<(), Error> { let datum = Datum::CumulativeF64(1.1.into()); - let as_json = serde_json::Value::from(1.1_f64); - test_recall_measurement_impl::(datum, None, as_json, client) + let measurement = Measurement::new(Utc::now(), datum); + test_recall_measurement_impl(measurement.clone(), client).await?; + test_recall_missing_scalar_measurement_impl(measurement, client) .await?; Ok(()) } @@ -3089,13 +3144,15 @@ mod tests { Datum: From>, serde_json::Value: From, { - let (bins, counts) = hist.to_arrays(); let datum = Datum::from(hist); - let as_json = serde_json::Value::Array( - counts.into_iter().map(Into::into).collect(), + let measurement = Measurement::new(Utc::now(), datum); + let missing_datum = Datum::Missing( + MissingDatum::new(measurement.datum_type(), Some(Utc::now())) + .unwrap(), ); - test_recall_measurement_impl(datum, Some(bins), as_json, client) - .await?; + let missing_measurement = Measurement::new(Utc::now(), missing_datum); + test_recall_measurement_impl(measurement, client).await?; + test_recall_measurement_impl(missing_measurement, client).await?; Ok(()) } @@ -3192,54 +3249,23 @@ mod tests { Ok(()) } - async fn test_recall_measurement_impl + Copy>( - datum: Datum, - maybe_bins: Option>, - json_datum: serde_json::Value, + async fn test_recall_measurement_impl( + measurement: Measurement, client: &Client, ) -> Result<(), Error> { // Insert a record from this datum. const TIMESERIES_NAME: &str = "foo:bar"; const TIMESERIES_KEY: u64 = 101; - let mut inserted_row = serde_json::Map::new(); - inserted_row - .insert("timeseries_name".to_string(), TIMESERIES_NAME.into()); - inserted_row - .insert("timeseries_key".to_string(), TIMESERIES_KEY.into()); - inserted_row.insert( - "timestamp".to_string(), - Utc::now() - .format(crate::DATABASE_TIMESTAMP_FORMAT) - .to_string() - .into(), - ); - - // Insert the start time and possibly bins. - if let Some(start_time) = datum.start_time() { - inserted_row.insert( - "start_time".to_string(), - start_time - .format(crate::DATABASE_TIMESTAMP_FORMAT) - .to_string() - .into(), - ); - } - if let Some(bins) = &maybe_bins { - let bins = serde_json::Value::Array( - bins.iter().copied().map(Into::into).collect(), + let (measurement_table, inserted_row) = + crate::model::unroll_measurement_row_impl( + TIMESERIES_NAME.to_string(), + TIMESERIES_KEY, + &measurement, ); - inserted_row.insert("bins".to_string(), bins); - inserted_row.insert("counts".to_string(), json_datum); - } else { - inserted_row.insert("datum".to_string(), json_datum); - } - let inserted_row = serde_json::Value::from(inserted_row); - - let measurement_table = measurement_table_name(datum.datum_type()); - let row = serde_json::to_string(&inserted_row).unwrap(); let insert_sql = format!( - "INSERT INTO oximeter.{measurement_table} FORMAT JSONEachRow {row}", + "INSERT INTO {measurement_table} FORMAT JSONEachRow {inserted_row}", ); + println!("Inserted row: {}", inserted_row); client .execute(insert_sql) .await @@ -3247,21 +3273,22 @@ mod tests { // Select it exactly back out. let select_sql = format!( - "SELECT * FROM oximeter.{} LIMIT 2 FORMAT {};", + "SELECT * FROM {} WHERE timestamp = '{}' FORMAT {};", measurement_table, + measurement.timestamp().format(crate::DATABASE_TIMESTAMP_FORMAT), crate::DATABASE_SELECT_FORMAT, ); let body = client .execute_with_body(select_sql) .await .expect("Failed to select measurement row"); - println!("{}", body); - let actual_row: serde_json::Value = serde_json::from_str(&body) - .expect("Failed to parse measurement row JSON"); - println!("{actual_row:?}"); - println!("{inserted_row:?}"); + let (_, actual_row) = crate::model::parse_measurement_from_row( + &body, + measurement.datum_type(), + ); + println!("Actual row: {actual_row:?}"); assert_eq!( - actual_row, inserted_row, + actual_row, measurement, "Actual and expected measurement rows do not match" ); Ok(()) @@ -3311,6 +3338,10 @@ mod tests { recall_measurement_f64_test(&client).await.unwrap(); + recall_measurement_string_test(&client).await.unwrap(); + + recall_measurement_bytes_test(&client).await.unwrap(); + recall_measurement_cumulative_i64_test(&client).await.unwrap(); recall_measurement_cumulative_u64_test(&client).await.unwrap(); diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 715e025a04..d92e646e89 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -26,6 +26,7 @@ use oximeter::types::Field; use oximeter::types::FieldType; use oximeter::types::FieldValue; use oximeter::types::Measurement; +use oximeter::types::MissingDatum; use oximeter::types::Sample; use serde::Deserialize; use serde::Serialize; @@ -43,7 +44,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 3; +pub const OXIMETER_VERSION: u64 = 4; // Wrapper type to represent a boolean in the database. // @@ -212,6 +213,7 @@ impl From for DbFieldType { } } } + #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)] pub enum DbDatumType { Bool, @@ -402,7 +404,7 @@ macro_rules! declare_measurement_row { timeseries_key: TimeseriesKey, #[serde(with = "serde_timestamp")] timestamp: DateTime, - datum: $datum_type, + datum: Option<$datum_type>, } impl_table_name!{$name, "measurements", $data_type} @@ -433,7 +435,7 @@ macro_rules! declare_cumulative_measurement_row { start_time: DateTime, #[serde(with = "serde_timestamp")] timestamp: DateTime, - datum: $datum_type, + datum: Option<$datum_type>, } impl_table_name!{$name, "measurements", $data_type} @@ -456,6 +458,22 @@ struct DbHistogram { pub counts: Vec, } +// We use an empty histogram to indicate a missing sample. +// +// While ClickHouse supports nullable types, the inner type can't be a +// "composite", which includes arrays. I.e., `Nullable(Array(UInt8))` can't be +// used. This is unfortunate, but we are aided by the fact that it's not +// possible to have an `oximeter` histogram that contains zero bins right now. +// This is checked by a test in `oximeter::histogram`. +// +// That means we can currently use an empty array from the database as a +// sentinel for a missing sample. +impl DbHistogram { + fn null() -> Self { + Self { bins: vec![], counts: vec![] } + } +} + impl From<&Histogram> for DbHistogram where T: traits::HistogramSupport, @@ -647,270 +665,571 @@ pub(crate) fn unroll_measurement_row(sample: &Sample) -> (String, String) { let timeseries_name = sample.timeseries_name.clone(); let timeseries_key = crate::timeseries_key(sample); let measurement = &sample.measurement; + unroll_measurement_row_impl(timeseries_name, timeseries_key, measurement) +} + +/// Given a sample's measurement, return a table name and row to insert. +/// +/// This returns a tuple giving the name of the table, and the JSON +/// representation for the serialized row to be inserted into that table, +/// written out as a string. +pub(crate) fn unroll_measurement_row_impl( + timeseries_name: String, + timeseries_key: TimeseriesKey, + measurement: &Measurement, +) -> (String, String) { let timestamp = measurement.timestamp(); let extract_start_time = |measurement: &Measurement| { measurement .start_time() .expect("Cumulative measurements must have a start time") }; + match measurement.datum() { Datum::Bool(inner) => { + let datum = Some(DbBool::from(*inner)); let row = BoolMeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: DbBool::from(*inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::I8(inner) => { + let datum = Some(*inner); let row = I8MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::U8(inner) => { + let datum = Some(*inner); let row = U8MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::I16(inner) => { + let datum = Some(*inner); let row = I16MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::U16(inner) => { + let datum = Some(*inner); let row = U16MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::I32(inner) => { + let datum = Some(*inner); let row = I32MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::U32(inner) => { + let datum = Some(*inner); let row = U32MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::I64(inner) => { + let datum = Some(*inner); let row = I64MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::U64(inner) => { + let datum = Some(*inner); let row = U64MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::F32(inner) => { + let datum = Some(*inner); let row = F32MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::F64(inner) => { + let datum = Some(*inner); let row = F64MeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: *inner, + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::String(ref inner) => { + Datum::String(inner) => { + let datum = Some(inner.clone()); let row = StringMeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: inner.clone(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::Bytes(ref inner) => { + Datum::Bytes(inner) => { + let datum = Some(inner.clone()); let row = BytesMeasurementRow { timeseries_name, timeseries_key, timestamp, - datum: inner.clone(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::CumulativeI64(inner) => { + let datum = Some(inner.value()); let row = CumulativeI64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: inner.value(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::CumulativeU64(inner) => { + let datum = Some(inner.value()); let row = CumulativeU64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: inner.value(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::CumulativeF32(inner) => { + let datum = Some(inner.value()); let row = CumulativeF32MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: inner.value(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } Datum::CumulativeF64(inner) => { + let datum = Some(inner.value()); let row = CumulativeF64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: inner.value(), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramI8(ref inner) => { + Datum::HistogramI8(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramI8MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramU8(ref inner) => { + Datum::HistogramU8(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramU8MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramI16(ref inner) => { + Datum::HistogramI16(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramI16MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramU16(ref inner) => { + Datum::HistogramU16(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramU16MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramI32(ref inner) => { + Datum::HistogramI32(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramI32MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramU32(ref inner) => { + Datum::HistogramU32(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramU32MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramI64(ref inner) => { + Datum::HistogramI64(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramI64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramU64(ref inner) => { + Datum::HistogramU64(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramU64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramF32(ref inner) => { + Datum::HistogramF32(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramF32MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } - Datum::HistogramF64(ref inner) => { + Datum::HistogramF64(inner) => { + let datum = DbHistogram::from(inner); let row = HistogramF64MeasurementRow { timeseries_name, timeseries_key, start_time: extract_start_time(measurement), timestamp, - datum: DbHistogram::from(inner), + datum, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } + Datum::Missing(missing) => { + match missing.datum_type() { + DatumType::Bool => { + let row = BoolMeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::I8 => { + let row = I8MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::U8 => { + let row = U8MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::I16 => { + let row = I16MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::U16 => { + let row = U16MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::I32 => { + let row = I32MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::U32 => { + let row = U32MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::I64 => { + let row = I64MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::U64 => { + let row = U64MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::F32 => { + let row = F32MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::F64 => { + let row = F64MeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::String => { + let row = StringMeasurementRow { + timeseries_name, + timeseries_key, + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::Bytes => { + // See https://github.com/oxidecomputer/omicron/issues/4551. + // + // This is actually unreachable today because the constuctor + // for `oximeter::types::MissingDatum` fails when using a + // `DatumType::Bytes`. + unreachable!(); + } + DatumType::CumulativeI64 => { + let row = CumulativeI64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::CumulativeU64 => { + let row = CumulativeU64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::CumulativeF32 => { + let row = CumulativeF32MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::CumulativeF64 => { + let row = CumulativeF64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: None, + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramI8 => { + let row = HistogramI8MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramU8 => { + let row = HistogramU8MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramI16 => { + let row = HistogramI16MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramU16 => { + let row = HistogramU16MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramI32 => { + let row = HistogramI32MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramU32 => { + let row = HistogramU32MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramI64 => { + let row = HistogramI64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramU64 => { + let row = HistogramU64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramF32 => { + let row = HistogramF32MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + DatumType::HistogramF64 => { + let row = HistogramF64MeasurementRow { + timeseries_name, + timeseries_key, + start_time: extract_start_time(measurement), + timestamp, + datum: DbHistogram::null(), + }; + (row.table_name(), serde_json::to_string(&row).unwrap()) + } + } + } } } @@ -984,7 +1303,7 @@ struct DbTimeseriesScalarGaugeSample { timeseries_key: TimeseriesKey, #[serde(with = "serde_timestamp")] timestamp: DateTime, - datum: T, + datum: Option, } // A scalar timestamped sample from a cumulative timeseries, as extracted from a query to the @@ -996,7 +1315,7 @@ struct DbTimeseriesScalarCumulativeSample { start_time: DateTime, #[serde(with = "serde_timestamp")] timestamp: DateTime, - datum: T, + datum: Option, } // A histogram timestamped sample from a timeseries, as extracted from a query to the database. @@ -1014,9 +1333,15 @@ struct DbTimeseriesHistogramSample { impl From> for Measurement where Datum: From, + T: FromDbScalar, { fn from(sample: DbTimeseriesScalarGaugeSample) -> Measurement { - let datum = Datum::from(sample.datum); + let datum = match sample.datum { + Some(datum) => Datum::from(datum), + None => { + Datum::Missing(MissingDatum::new(T::DATUM_TYPE, None).unwrap()) + } + }; Measurement::new(sample.timestamp, datum) } } @@ -1024,12 +1349,19 @@ where impl From> for Measurement where Datum: From>, - T: traits::Cumulative, + T: traits::Cumulative + FromDbCumulative, { fn from(sample: DbTimeseriesScalarCumulativeSample) -> Measurement { - let cumulative = - Cumulative::with_start_time(sample.start_time, sample.datum); - let datum = Datum::from(cumulative); + let datum = match sample.datum { + Some(datum) => Datum::from(Cumulative::with_start_time( + sample.start_time, + datum, + )), + None => Datum::Missing( + MissingDatum::new(T::DATUM_TYPE, Some(sample.start_time)) + .unwrap(), + ), + }; Measurement::new(sample.timestamp, datum) } } @@ -1037,26 +1369,157 @@ where impl From> for Measurement where Datum: From>, - T: traits::HistogramSupport, + T: traits::HistogramSupport + FromDbHistogram, { fn from(sample: DbTimeseriesHistogramSample) -> Measurement { - let datum = Datum::from( - Histogram::from_arrays( - sample.start_time, - sample.bins, - sample.counts, + let datum = if sample.bins.is_empty() { + assert!(sample.counts.is_empty()); + Datum::Missing( + MissingDatum::new(T::DATUM_TYPE, Some(sample.start_time)) + .unwrap(), ) - .unwrap(), - ); + } else { + Datum::from( + Histogram::from_arrays( + sample.start_time, + sample.bins, + sample.counts, + ) + .unwrap(), + ) + }; Measurement::new(sample.timestamp, datum) } } +// Helper trait providing the DatumType for a corresponding scalar DB value. +// +// This is used in `parse_timeseries_scalar_gauge_measurement`. +trait FromDbScalar { + const DATUM_TYPE: DatumType; +} + +impl FromDbScalar for DbBool { + const DATUM_TYPE: DatumType = DatumType::Bool; +} + +impl FromDbScalar for i8 { + const DATUM_TYPE: DatumType = DatumType::I8; +} + +impl FromDbScalar for u8 { + const DATUM_TYPE: DatumType = DatumType::U8; +} + +impl FromDbScalar for i16 { + const DATUM_TYPE: DatumType = DatumType::I16; +} + +impl FromDbScalar for u16 { + const DATUM_TYPE: DatumType = DatumType::U16; +} + +impl FromDbScalar for i32 { + const DATUM_TYPE: DatumType = DatumType::I32; +} + +impl FromDbScalar for u32 { + const DATUM_TYPE: DatumType = DatumType::U32; +} + +impl FromDbScalar for i64 { + const DATUM_TYPE: DatumType = DatumType::I64; +} + +impl FromDbScalar for u64 { + const DATUM_TYPE: DatumType = DatumType::U64; +} + +impl FromDbScalar for f32 { + const DATUM_TYPE: DatumType = DatumType::F32; +} + +impl FromDbScalar for f64 { + const DATUM_TYPE: DatumType = DatumType::F64; +} + +impl FromDbScalar for String { + const DATUM_TYPE: DatumType = DatumType::String; +} + +impl FromDbScalar for Bytes { + const DATUM_TYPE: DatumType = DatumType::Bytes; +} + +trait FromDbCumulative { + const DATUM_TYPE: DatumType; +} + +impl FromDbCumulative for i64 { + const DATUM_TYPE: DatumType = DatumType::CumulativeI64; +} + +impl FromDbCumulative for u64 { + const DATUM_TYPE: DatumType = DatumType::CumulativeU64; +} + +impl FromDbCumulative for f32 { + const DATUM_TYPE: DatumType = DatumType::CumulativeF32; +} + +impl FromDbCumulative for f64 { + const DATUM_TYPE: DatumType = DatumType::CumulativeF64; +} + +trait FromDbHistogram { + const DATUM_TYPE: DatumType; +} + +impl FromDbHistogram for i8 { + const DATUM_TYPE: DatumType = DatumType::HistogramI8; +} + +impl FromDbHistogram for u8 { + const DATUM_TYPE: DatumType = DatumType::HistogramU8; +} + +impl FromDbHistogram for i16 { + const DATUM_TYPE: DatumType = DatumType::HistogramI16; +} + +impl FromDbHistogram for u16 { + const DATUM_TYPE: DatumType = DatumType::HistogramU16; +} + +impl FromDbHistogram for i32 { + const DATUM_TYPE: DatumType = DatumType::HistogramI32; +} + +impl FromDbHistogram for u32 { + const DATUM_TYPE: DatumType = DatumType::HistogramU32; +} + +impl FromDbHistogram for i64 { + const DATUM_TYPE: DatumType = DatumType::HistogramI64; +} + +impl FromDbHistogram for u64 { + const DATUM_TYPE: DatumType = DatumType::HistogramU64; +} + +impl FromDbHistogram for f32 { + const DATUM_TYPE: DatumType = DatumType::HistogramF32; +} + +impl FromDbHistogram for f64 { + const DATUM_TYPE: DatumType = DatumType::HistogramF64; +} + fn parse_timeseries_scalar_gauge_measurement<'a, T>( line: &'a str, ) -> (TimeseriesKey, Measurement) where - T: Deserialize<'a> + Into, + T: Deserialize<'a> + Into + FromDbScalar, Datum: From, { let sample = @@ -1068,7 +1531,7 @@ fn parse_timeseries_scalar_cumulative_measurement<'a, T>( line: &'a str, ) -> (TimeseriesKey, Measurement) where - T: Deserialize<'a> + traits::Cumulative, + T: Deserialize<'a> + traits::Cumulative + FromDbCumulative, Datum: From>, { let sample = @@ -1081,7 +1544,7 @@ fn parse_timeseries_histogram_measurement( line: &str, ) -> (TimeseriesKey, Measurement) where - T: Into + traits::HistogramSupport, + T: Into + traits::HistogramSupport + FromDbHistogram, Datum: From>, { let sample = @@ -1459,6 +1922,27 @@ mod tests { } } + // Test that we correctly unroll a row when the measurement is missing its + // datum. + #[test] + fn test_unroll_missing_measurement_row() { + let sample = test_util::make_sample(); + let missing_sample = test_util::make_missing_sample(); + let (table_name, row) = unroll_measurement_row(&sample); + let (missing_table_name, missing_row) = + unroll_measurement_row(&missing_sample); + let row = serde_json::from_str::(&row).unwrap(); + let missing_row = + serde_json::from_str::(&missing_row).unwrap(); + println!("{row:#?}"); + println!("{missing_row:#?}"); + assert_eq!(table_name, missing_table_name); + assert_eq!(row.timeseries_name, missing_row.timeseries_name); + assert_eq!(row.timeseries_key, missing_row.timeseries_key); + assert!(row.datum.is_some()); + assert!(missing_row.datum.is_none()); + } + #[test] fn test_unroll_measurement_row() { let sample = test_util::make_hist_sample(); @@ -1473,14 +1957,13 @@ mod tests { ) .unwrap(); let measurement = &sample.measurement; - if let Datum::HistogramF64(hist) = measurement.datum() { - assert_eq!( - hist, &unpacked_hist, - "Unpacking histogram from database representation failed" - ); - } else { + let Datum::HistogramF64(hist) = measurement.datum() else { panic!("Expected a histogram measurement"); - } + }; + assert_eq!( + hist, &unpacked_hist, + "Unpacking histogram from database representation failed" + ); assert_eq!(unpacked.start_time, measurement.start_time().unwrap()); } @@ -1582,12 +2065,11 @@ mod tests { assert_eq!(key, 12); assert_eq!(measurement.start_time().unwrap(), start_time); assert_eq!(measurement.timestamp(), timestamp); - if let Datum::HistogramI64(hist) = measurement.datum() { - assert_eq!(hist.n_bins(), 3); - assert_eq!(hist.n_samples(), 2); - } else { + let Datum::HistogramI64(hist) = measurement.datum() else { panic!("Expected a histogram sample"); - } + }; + assert_eq!(hist.n_bins(), 3); + assert_eq!(hist.n_samples(), 2); } #[test] @@ -1624,4 +2106,14 @@ mod tests { "Histogram reconstructed from paired arrays is not correct" ); } + #[test] + fn test_parse_bytes_measurement() { + let s = r#"{"timeseries_key": 101, "timestamp": "2023-11-21 18:25:21.963714255", "datum": "\u0001\u0002\u0003"}"#; + let (_, meas) = parse_timeseries_scalar_gauge_measurement::(&s); + println!("{meas:?}"); + let Datum::Bytes(b) = meas.datum() else { + unreachable!(); + }; + assert_eq!(b.to_vec(), vec![1, 2, 3]); + } } diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index 8a69494d5a..0cb2d8cace 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -21,4 +21,5 @@ omicron-workspace-hack.workspace = true [dev-dependencies] approx.workspace = true rstest.workspace = true +serde_json.workspace = true trybuild.workspace = true diff --git a/oximeter/oximeter/src/histogram.rs b/oximeter/oximeter/src/histogram.rs index c399384ffa..aaf9297ca4 100644 --- a/oximeter/oximeter/src/histogram.rs +++ b/oximeter/oximeter/src/histogram.rs @@ -1353,13 +1353,10 @@ mod tests { } #[test] - fn test_foo() { - let bins: Vec = 10u16.bins(1, 3, 30.try_into().unwrap()).unwrap(); - println!("{bins:?}"); - dbg!(bins.len()); - let hist = Histogram::new(&bins).unwrap(); - for bin in hist.iter() { - println!("{}", bin.range); - } + fn test_empty_bins_not_supported() { + assert!(matches!( + Histogram::::new(&[]).unwrap_err(), + HistogramError::EmptyBins + )); } } diff --git a/oximeter/oximeter/src/test_util.rs b/oximeter/oximeter/src/test_util.rs index f3750d6d83..a9778d03bc 100644 --- a/oximeter/oximeter/src/test_util.rs +++ b/oximeter/oximeter/src/test_util.rs @@ -48,19 +48,27 @@ pub struct TestHistogram { pub datum: Histogram, } +const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); + pub fn make_sample() -> Sample { let target = TestTarget::default(); - let metric = TestMetric { id: Uuid::new_v4(), good: true, datum: 1 }; + let metric = TestMetric { id: ID, good: true, datum: 1 }; Sample::new(&target, &metric).unwrap() } +pub fn make_missing_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new_missing(&target, &metric).unwrap() +} + pub fn make_hist_sample() -> Sample { let target = TestTarget::default(); let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); hist.sample(1.0).unwrap(); hist.sample(2.0).unwrap(); hist.sample(6.0).unwrap(); - let metric = TestHistogram { id: Uuid::new_v4(), good: true, datum: hist }; + let metric = TestHistogram { id: ID, good: true, datum: hist }; Sample::new(&target, &metric).unwrap() } diff --git a/oximeter/oximeter/src/traits.rs b/oximeter/oximeter/src/traits.rs index 096abb8023..0934d231e3 100644 --- a/oximeter/oximeter/src/traits.rs +++ b/oximeter/oximeter/src/traits.rs @@ -30,8 +30,15 @@ use std::ops::AddAssign; /// definition can be thought of as a schema, and an instance of that struct as identifying an /// individual target. /// -/// Target fields may have one of a set of supported types: `bool`, `i64`, `String`, `IpAddr`, or -/// `Uuid`. Any number of fields greater than zero is supported. +/// Target fields may have one of a set of supported types: +/// +/// - `bool` +/// - any fixed-width integer, e.g., `u8` or `i64` +/// - `String` +/// - `IpAddr` +/// - `Uuid` +/// +/// Any number of fields greater than zero is supported. /// /// Examples /// -------- @@ -105,9 +112,28 @@ pub trait Target { /// One field of the struct is special, describing the actual measured data that the metric /// represents. This should be a field named `datum`, or another field (with any name you choose) /// annotated with the `#[datum]` attribute. This field represents the underlying data for the -/// metric, and must be one of the supported types, implementing the [`Datum`] trait. This can -/// be any of: `i64`, `f64`, `bool`, `String`, or `Bytes` for gauges, and `Cumulative` or -/// `Histogram` for cumulative metrics, where `T` is `i64` or `f64`. +/// metric, and must be one of the supported types, implementing the [`Datum`] trait. +/// +/// For gauge types, this can be any of: +/// +/// - `bool` +/// - a fixed-width integer, e.g. `u8` or `i64` +/// - `f32` or `f64` +/// - `String` +/// - `Bytes` +/// +/// Cumulative types can be any of `Cumulative`, where `T` is +/// +/// - `i64` +/// - `u64` +/// - `f32` +/// - `f64` +/// +/// Histogram types can be any `Histogram`, wher `T` is: +/// +/// - a fixed-width integer, e.g. `u8` or `i64` +/// - `f32` +/// - `f64` /// /// The value of the metric's data is _measured_ by using the `measure()` method, which returns a /// [`Measurement`]. This describes a timestamped data point for the metric. diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index 325974781e..23dbe2be6b 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -369,6 +369,7 @@ pub enum Datum { HistogramU64(histogram::Histogram), HistogramF32(histogram::Histogram), HistogramF64(histogram::Histogram), + Missing(MissingDatum), } impl Datum { @@ -402,6 +403,7 @@ impl Datum { Datum::HistogramU64(_) => DatumType::HistogramU64, Datum::HistogramF32(_) => DatumType::HistogramF32, Datum::HistogramF64(_) => DatumType::HistogramF64, + Datum::Missing(ref inner) => inner.datum_type(), } } @@ -440,6 +442,7 @@ impl Datum { Datum::HistogramU64(ref inner) => Some(inner.start_time()), Datum::HistogramF32(ref inner) => Some(inner.start_time()), Datum::HistogramF64(ref inner) => Some(inner.start_time()), + Datum::Missing(ref inner) => inner.start_time(), } } } @@ -495,6 +498,60 @@ impl From<&str> for Datum { } } +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct MissingDatum { + datum_type: DatumType, + start_time: Option>, +} + +impl MissingDatum { + pub fn datum_type(&self) -> DatumType { + self.datum_type + } + + pub fn start_time(&self) -> Option> { + self.start_time + } + + pub fn new( + datum_type: DatumType, + start_time: Option>, + ) -> Result { + // See https://github.com/oxidecomputer/omicron/issues/4551. + if datum_type == DatumType::Bytes { + return Err(MetricsError::DatumError(String::from( + "Missing samples from byte array types are not supported", + ))); + } + if datum_type.is_cumulative() && start_time.is_none() { + return Err(MetricsError::MissingDatumRequiresStartTime { + datum_type, + }); + } + if !datum_type.is_cumulative() && start_time.is_some() { + return Err(MetricsError::MissingDatumCannotHaveStartTime { + datum_type, + }); + } + Ok(Self { datum_type, start_time }) + } +} + +impl From for Datum { + fn from(d: MissingDatum) -> Datum { + Datum::Missing(d) + } +} + +impl From<&M> for MissingDatum { + fn from(metric: &M) -> Self { + MissingDatum { + datum_type: metric.datum_type(), + start_time: metric.start_time(), + } + } +} + /// A `Measurement` is a timestamped datum from a single metric #[derive(Clone, Debug, PartialEq, JsonSchema, Serialize, Deserialize)] pub struct Measurement { @@ -516,6 +573,11 @@ impl Measurement { Self { timestamp, datum: datum.into() } } + /// Return true if this measurement represents a missing datum. + pub fn is_missing(&self) -> bool { + matches!(self.datum, Datum::Missing(_)) + } + /// Return the datum for this measurement pub fn datum(&self) -> &Datum { &self.datum @@ -561,6 +623,12 @@ pub enum MetricsError { /// A field name is duplicated between the target and metric. #[error("Field '{name}' is duplicated between the target and metric")] DuplicateFieldName { name: String }, + + #[error("Missing datum of type {datum_type} requires a start time")] + MissingDatumRequiresStartTime { datum_type: DatumType }, + + #[error("Missing datum of type {datum_type} cannot have a start time")] + MissingDatumCannotHaveStartTime { datum_type: DatumType }, } impl From for omicron_common::api::external::Error { @@ -734,6 +802,29 @@ impl Sample { }) } + /// Construct a new missing sample, recorded at the time of the supplied + /// timestamp. + pub fn new_missing_with_timestamp( + timestamp: DateTime, + target: &T, + metric: &M, + ) -> Result + where + T: traits::Target, + M: traits::Metric, + { + let target_fields = FieldSet::from_target(target); + let metric_fields = FieldSet::from_metric(metric); + Self::verify_field_names(&target_fields, &metric_fields)?; + let datum = Datum::Missing(MissingDatum::from(metric)); + Ok(Self { + timeseries_name: crate::timeseries_name(target, metric), + target: target_fields, + metric: metric_fields, + measurement: Measurement { timestamp, datum }, + }) + } + /// Construct a new sample, created at the time the function is called. /// /// This materializes the data from the target and metric, and stores that information along @@ -746,6 +837,18 @@ impl Sample { Self::new_with_timestamp(Utc::now(), target, metric) } + /// Construct a new sample with a missing measurement. + pub fn new_missing( + target: &T, + metric: &M, + ) -> Result + where + T: traits::Target, + M: traits::Metric, + { + Self::new_missing_with_timestamp(Utc::now(), target, metric) + } + /// Return the fields for this sample. /// /// This returns the target fields and metric fields, chained, although there is no distinction @@ -951,7 +1054,7 @@ mod tests { fn test_measurement() { let measurement = Measurement::new(chrono::Utc::now(), 0i64); assert_eq!(measurement.datum_type(), DatumType::I64); - assert_eq!(measurement.start_time(), None); + assert!(measurement.start_time().is_none()); let datum = Cumulative::new(0i64); let measurement = Measurement::new(chrono::Utc::now(), datum); From 5c90213e1b07b64cb02b9f5d2858c09f2832371a Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 4 Dec 2023 16:09:20 -0800 Subject: [PATCH 047/186] [nexus] remove views::SledProvisionState::Unknown (#4608) This doesn't quite work as expected: * As an input type, if an unknown `SledProvisionState` is specified, we immediately produce an error as soon as we enter the HTTP entrypoint. There's no functional difference between that and producing an error at deserialization time. * As an output type, progenitor doesn't support `#[serde(other)]` so the unknown type doesn't work. --- Cargo.lock | 1 - nexus/db-model/src/sled_provision_state.rs | 13 ++++--------- nexus/src/external_api/http_entrypoints.rs | 5 +---- nexus/types/Cargo.toml | 1 - nexus/types/src/external_api/views.rs | 7 ------- openapi/nexus.json | 7 ------- 6 files changed, 5 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 28d3015025..9671cb34bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4179,7 +4179,6 @@ dependencies = [ "schemars", "serde", "serde_json", - "serde_with", "steno", "strum", "uuid", diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs index 6cf81b9c70..b2b1ee39dc 100644 --- a/nexus/db-model/src/sled_provision_state.rs +++ b/nexus/db-model/src/sled_provision_state.rs @@ -34,19 +34,14 @@ impl From for views::SledProvisionState { } } -impl TryFrom for SledProvisionState { - type Error = UnknownSledProvisionState; - - fn try_from(state: views::SledProvisionState) -> Result { +impl From for SledProvisionState { + fn from(state: views::SledProvisionState) -> Self { match state { views::SledProvisionState::Provisionable => { - Ok(SledProvisionState::Provisionable) + SledProvisionState::Provisionable } views::SledProvisionState::NonProvisionable => { - Ok(SledProvisionState::NonProvisionable) - } - views::SledProvisionState::Unknown => { - Err(UnknownSledProvisionState) + SledProvisionState::NonProvisionable } } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index f1302f4a73..ef8d73afab 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -4504,10 +4504,7 @@ async fn sled_set_provision_state( let opctx = crate::context::op_context_for_external_api(&rqctx).await?; // Convert the external `SledProvisionState` into our internal data model. - let new_state = - db::model::SledProvisionState::try_from(provision_state).map_err( - |error| HttpError::for_bad_request(None, format!("{error}")), - )?; + let new_state = db::model::SledProvisionState::from(provision_state); let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 8cbbd8626c..9cb94a8484 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -14,7 +14,6 @@ parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true serde_json.workspace = true -serde_with.workspace = true steno.workspace = true strum.workspace = true uuid.workspace = true diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 6d02623f34..4006b18bcc 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -17,7 +17,6 @@ use omicron_common::api::external::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use serde_with::rust::deserialize_ignore_any; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::net::IpAddr; @@ -311,12 +310,6 @@ pub enum SledProvisionState { /// resources will continue to be on this sled unless manually migrated /// off. NonProvisionable, - - /// This is a state that isn't known yet. - /// - /// This is defined to avoid API breakage. - #[serde(other, deserialize_with = "deserialize_ignore_any")] - Unknown, } /// An operator's view of an instance running on a given sled diff --git a/openapi/nexus.json b/openapi/nexus.json index a6dffc6265..1c7e25d004 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -13259,13 +13259,6 @@ "enum": [ "non_provisionable" ] - }, - { - "description": "This is a state that isn't known yet.\n\nThis is defined to avoid API breakage.", - "type": "string", - "enum": [ - "unknown" - ] } ] }, From 301edd733bbbfb3117ad4ae30c483fb6e97ea0c7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 05:14:35 +0000 Subject: [PATCH 048/186] Update taiki-e/install-action digest to d211c4b (#4612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`21526ba` -> `d211c4b`](https://togithub.com/taiki-e/install-action/compare/21526ba...d211c4b) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index b5a7504066..70b57a450a 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@21526ba3bb38834e625c185ae4f2f942f1fb8f27 # v2 + uses: taiki-e/install-action@d211c4be5a95cbcd52a0870dda7d63a107a58368 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 0a781f9957f5da1ef4b2ca7dce1d7bf13be5b739 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:15:21 -0800 Subject: [PATCH 049/186] Update Rust crate derive-where to 1.2.6 (#4613) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9671cb34bb..8f855f5219 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1585,9 +1585,9 @@ dependencies = [ [[package]] name = "derive-where" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146398d62142a0f35248a608f17edf0dde57338354966d6e41d0eb2d16980ccb" +checksum = "48d9b1fc2a6d7e19c89e706a3769e31ee862ac7a4c810c7c0ff3910e1a42a4ce" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 533e710dc2..931d885e7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -181,7 +181,7 @@ ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.17" -derive-where = "1.2.5" +derive-where = "1.2.6" diesel = { version = "2.1.4", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } From 7dca6fc9edf52fbf6846deec9346217c495c235d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 5 Dec 2023 06:59:04 -0800 Subject: [PATCH 050/186] [wicketd] Add refresh-config subcommand instead of using curl (#4606) This allows us to add some time for refreshing a wicketd that hasn't fully started (and therefore isn't reachable on its dropshot server yet). Fixes https://github.com/oxidecomputer/omicron/issues/4604. Testing this is a little awkward because config refresh is only support on illumos via SMF, so I tested this by hand on `madrid`: 0. Disable wicketd (`svcadm disable wicketd`) 1. Install the new wicketd binary and `manifest.xml` in the switch zone 2. Import the new manifest.xml (`svccfg import /var/svc/manifest/site/wicketd/manifest.xml`) 3. (I expected to need to restore the config properties that sled-agent had set, but they persisted) 4. Enable wicketd and immediately try to refresh it (`svcadm enable wicketd && svcadm refresh wicketd`) In the wicketd logs, we see that the first refresh attempt failed because it fired before the dropshot server was up: ``` [ Dec 4 18:34:22 Executing start method ("ctrun -l child -o noorphan,regent /opt/oxide/wicketd/bin/wicketd run /var/svc/manifest/site/wicketd/config.toml --address [::1]:12226 --artifact-address [fdb0:a840:2504:355::2]:12227 --mgs-address [::1]:12225 --nex us-proxy-address [::]:12229 --baseboard-file /opt/oxide/baseboard.json --read-smf-config &"). ] [ Dec 4 18:34:22 Method "start" exited with status 0. ] [ Dec 4 18:34:22 Rereading configuration. ] note: configured to log to "/dev/stdout" [ Dec 4 18:34:22 Executing refresh method ("/opt/oxide/wicketd/bin/wicketd refresh-config /var/svc/manifest/site/wicketd/config.toml --address [::1]:12226"). ] note: configured to log to "/dev/stdout" 18:34:22.330Z WARN wicketd: failed to refresh wicketd config (attempt 1 of 3); will retry after 5s err = Communication Error: error sending request for url (http://[::1]:12226/reload-config): error trying to connect: tcp connect error: Connection refused (os error 146)\nCaused by:\n -> error sending request for url (http://[::1]:12226/reload-config) : error trying to connect: tcp connect error: Connection refused (os error 146)\n -> error trying to connect: tcp connect error: Connection refused (os error 146)\n -> tcp connect error: Connection refused (os error 146)\n -> Connection refused (os error 146) 18:34:22.396Z INFO wicketd (dropshot (wicketd)): listening file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:195 local_addr = [::1]:12226 ``` 10 seconds later, we see the successful connection, POST, and exit of the SMF `refresh`: ``` 18:34:32.332Z INFO wicketd (dropshot (wicketd)): accepted connection file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:769 local_addr = [::1]:12226 remote_addr = [::1]:32976 18:34:32.388Z INFO wicketd (dropshot (wicketd)): request completed file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:853 latency_us = 30475 local_addr = [::1]:12226 method = POST remote_addr = [::1]:32976 req_id = e0c2034a-0a99-45c1-a651-57249ca258f0 response_code = 204 uri = /reload-config [ Dec 4 18:34:32 Method "refresh" exited with status 0. ] ``` --- smf/wicketd/manifest.xml | 2 +- wicketd/Cargo.toml | 2 +- wicketd/src/bin/wicketd.rs | 41 ++++++++++++++++++++--- wicketd/src/lib.rs | 68 +++++++++++++++++++++++++++++++++++--- 4 files changed, 102 insertions(+), 11 deletions(-) diff --git a/smf/wicketd/manifest.xml b/smf/wicketd/manifest.xml index 778a7abf2d..b45ff1544b 100644 --- a/smf/wicketd/manifest.xml +++ b/smf/wicketd/manifest.xml @@ -32,7 +32,7 @@ it expected https). --> diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 1360c28b19..97550342d0 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -58,6 +58,7 @@ sled-hardware.workspace = true tufaceous-lib.workspace = true update-engine.workspace = true wicket-common.workspace = true +wicketd-client.workspace = true omicron-workspace-hack.workspace = true [[bin]] @@ -83,4 +84,3 @@ tar.workspace = true tokio = { workspace = true, features = ["test-util"] } tufaceous.workspace = true wicket.workspace = true -wicketd-client.workspace = true diff --git a/wicketd/src/bin/wicketd.rs b/wicketd/src/bin/wicketd.rs index 887ac496e0..24fa802c79 100644 --- a/wicketd/src/bin/wicketd.rs +++ b/wicketd/src/bin/wicketd.rs @@ -5,6 +5,7 @@ //! Executable for wicketd: technician port based management service use anyhow::{anyhow, Context}; +use camino::Utf8PathBuf; use clap::Parser; use omicron_common::{ address::Ipv6Subnet, @@ -24,9 +25,9 @@ enum Args { /// Start a wicketd server Run { #[clap(name = "CONFIG_FILE_PATH", action)] - config_file_path: PathBuf, + config_file_path: Utf8PathBuf, - /// The address for the technician port + /// The address on which the main wicketd dropshot server should listen #[clap(short, long, action)] address: SocketAddrV6, @@ -57,6 +58,19 @@ enum Args { #[clap(long, action, conflicts_with("read_smf_config"))] rack_subnet: Option, }, + + /// Instruct a running wicketd server to refresh its config + /// + /// Mechanically, this hits a specific endpoint served by wicketd's dropshot + /// server + RefreshConfig { + #[clap(name = "CONFIG_FILE_PATH", action)] + config_file_path: Utf8PathBuf, + + /// The address of the server to refresh + #[clap(short, long, action)] + address: SocketAddrV6, + }, } #[tokio::main] @@ -104,9 +118,7 @@ async fn do_run() -> Result<(), CmdError> { }; let config = Config::from_file(&config_file_path) - .with_context(|| { - format!("failed to parse {}", config_file_path.display()) - }) + .with_context(|| format!("failed to parse {config_file_path}")) .map_err(CmdError::Failure)?; let rack_subnet = match rack_subnet { @@ -140,5 +152,24 @@ async fn do_run() -> Result<(), CmdError> { .await .map_err(|err| CmdError::Failure(anyhow!(err))) } + Args::RefreshConfig { config_file_path, address } => { + let config = Config::from_file(&config_file_path) + .with_context(|| format!("failed to parse {config_file_path}")) + .map_err(CmdError::Failure)?; + + let log = config + .log + .to_logger("wicketd") + .context("failed to initialize logger") + .map_err(CmdError::Failure)?; + + // When run via `svcadm refresh ...`, we need to respect the special + // [SMF exit codes](https://illumos.org/man/7/smf_method). Returning + // an error from main exits with code 1 (from libc::EXIT_FAILURE), + // which does not collide with any special SMF codes. + Server::refresh_config(log, address) + .await + .map_err(CmdError::Failure) + } } } diff --git a/wicketd/src/lib.rs b/wicketd/src/lib.rs index ada1902654..32188d77de 100644 --- a/wicketd/src/lib.rs +++ b/wicketd/src/lib.rs @@ -16,11 +16,12 @@ mod preflight_check; mod rss_config; mod update_tracker; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use artifacts::{WicketdArtifactServer, WicketdArtifactStore}; use bootstrap_addrs::BootstrapPeers; pub use config::Config; pub(crate) use context::ServerContext; +use display_error_chain::DisplayErrorChain; use dropshot::{ConfigDropshot, HandlerTaskMode, HttpServer}; pub use installinator_progress::{IprUpdateTracker, RunningUpdateState}; use internal_dns::resolver::Resolver; @@ -34,6 +35,7 @@ use preflight_check::PreflightCheckerHandler; use sled_hardware::Baseboard; use slog::{debug, error, o, Drain}; use std::sync::{Mutex, OnceLock}; +use std::time::Duration; use std::{ net::{SocketAddr, SocketAddrV6}, sync::Arc, @@ -70,7 +72,6 @@ pub struct SmfConfigValues { impl SmfConfigValues { #[cfg(target_os = "illumos")] pub fn read_current() -> Result { - use anyhow::Context; use illumos_utils::scf::ScfHandle; const CONFIG_PG: &str = "config"; @@ -259,11 +260,70 @@ impl Server { res = self.artifact_server => { match res { Ok(()) => Err("artifact server exited unexpectedly".to_owned()), - // The artifact server returns an anyhow::Error, which has a `Debug` impl that - // prints out the chain of errors. + // The artifact server returns an anyhow::Error, which has a + // `Debug` impl that prints out the chain of errors. Err(err) => Err(format!("running artifact server: {err:?}")), } } } } + + /// Instruct a running server at the specified address to reload its config + /// parameters + pub async fn refresh_config( + log: slog::Logger, + address: SocketAddrV6, + ) -> Result<()> { + // It's possible we're being told to refresh a server's config before + // it's ready to receive such a request, so we'll give it a healthy + // amount of time before we give up: we'll set a client timeout and also + // retry a few times. See + // https://github.com/oxidecomputer/omicron/issues/4604. + const CLIENT_TIMEOUT: Duration = Duration::from_secs(5); + const SLEEP_BETWEEN_RETRIES: Duration = Duration::from_secs(10); + const NUM_RETRIES: usize = 3; + + let client = reqwest::Client::builder() + .connect_timeout(CLIENT_TIMEOUT) + .timeout(CLIENT_TIMEOUT) + .build() + .context("failed to construct reqwest Client")?; + + let client = wicketd_client::Client::new_with_client( + &format!("http://{address}"), + client, + log, + ); + let log = client.inner(); + + let mut attempt = 0; + loop { + attempt += 1; + + // If we succeed, we're done. + let Err(err) = client.post_reload_config().await else { + return Ok(()); + }; + + // If we failed, either warn+sleep and try again, or fail. + if attempt < NUM_RETRIES { + slog::warn!( + log, + "failed to refresh wicketd config \ + (attempt {attempt} of {NUM_RETRIES}); \ + will retry after {CLIENT_TIMEOUT:?}"; + "err" => %DisplayErrorChain::new(&err), + ); + tokio::time::sleep(SLEEP_BETWEEN_RETRIES).await; + } else { + slog::error!( + log, + "failed to refresh wicketd config \ + (tried {NUM_RETRIES} times)"; + "err" => %DisplayErrorChain::new(&err), + ); + return Err(err).context("failed to contact wicketd"); + } + } + } } From 4b426d260a06e436f2b398e99a602568d8026e6e Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 5 Dec 2023 12:55:52 -0800 Subject: [PATCH 051/186] [ci] update nextest to 0.9.64 (#4609) There have been a bunch of releases and I don't want us to fall too far behind in CI. (Maybe this should be handled by Renovate :) ) --- .config/nextest.toml | 2 +- .github/buildomat/build-and-test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index ef296d7ef8..4f927d2396 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.59", recommended = "0.9.59" } +nextest-version = { required = "0.9.59", recommended = "0.9.64" } experimental = ["setup-scripts"] diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 6fda8bb8d7..34f81bab68 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -7,7 +7,7 @@ set -o xtrace # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) -NEXTEST_VERSION='0.9.59' +NEXTEST_VERSION='0.9.64' cargo --version rustc --version From d525deef3f12a08fc836bb10e8ad2ce7661fa66b Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 5 Dec 2023 13:52:06 -0800 Subject: [PATCH 052/186] Set version to 5.0.0 (#4566) --- .github/buildomat/jobs/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 0605ab6883..350ab37233 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -37,7 +37,7 @@ rustc --version # trampoline global zone images. # COMMIT=$(git rev-parse HEAD) -VERSION="1.0.4-0.ci+git${COMMIT:0:11}" +VERSION="5.0.0-0.ci+git${COMMIT:0:11}" echo "$VERSION" >/work/version.txt ptime -m ./tools/install_builder_prerequisites.sh -yp From b3d641a2dd3bb3f3dd68a413ddd670e3dbcc0b5c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Dec 2023 20:48:26 -0800 Subject: [PATCH 053/186] [nexus] Make most transactions automatically retry (#4487) Integrates automatic transaction retry into Nexus for most transactions. Additionally, this PR provides a "RetryHelper" object to help standardize how transaction retry is performed. Currently, after a short randomized wait (up to an upper bound), we retry unconditionally, emitting each attempt to Oximeter for further analysis. - [x] Depends on https://github.com/oxidecomputer/async-bb8-diesel/pull/58 - [x] As noted in https://github.com/oxidecomputer/async-bb8-diesel/pull/58, this will require customizing CRDB session variables to work correctly. (Edit: this is done on each transaction) Part of https://github.com/oxidecomputer/customer-support/issues/46 Part of https://github.com/oxidecomputer/omicron/issues/3814 --- Cargo.lock | 3 +- Cargo.toml | 2 +- nexus/db-model/src/sled.rs | 2 +- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/collection_attach.rs | 27 +- .../src/db/collection_detach_many.rs | 26 +- .../src/db/datastore/address_lot.rs | 103 +- nexus/db-queries/src/db/datastore/bgp.rs | 424 +++--- .../src/db/datastore/db_metadata.rs | 47 +- .../src/db/datastore/device_auth.rs | 53 +- nexus/db-queries/src/db/datastore/dns.rs | 80 +- .../src/db/datastore/external_ip.rs | 24 +- .../src/db/datastore/identity_provider.rs | 68 +- nexus/db-queries/src/db/datastore/mod.rs | 26 + .../src/db/datastore/network_interface.rs | 129 +- nexus/db-queries/src/db/datastore/project.rs | 172 ++- nexus/db-queries/src/db/datastore/rack.rs | 514 ++++--- nexus/db-queries/src/db/datastore/region.rs | 165 +-- nexus/db-queries/src/db/datastore/service.rs | 40 +- nexus/db-queries/src/db/datastore/silo.rs | 151 +- .../db-queries/src/db/datastore/silo_group.rs | 54 +- nexus/db-queries/src/db/datastore/sled.rs | 245 ++-- nexus/db-queries/src/db/datastore/snapshot.rs | 180 ++- .../src/db/datastore/switch_interface.rs | 166 ++- .../src/db/datastore/switch_port.rs | 1294 +++++++++-------- nexus/db-queries/src/db/datastore/update.rs | 56 +- .../virtual_provisioning_collection.rs | 32 +- nexus/db-queries/src/db/datastore/volume.rs | 685 +++++---- nexus/db-queries/src/db/datastore/vpc.rs | 75 +- nexus/db-queries/src/db/error.rs | 70 +- nexus/db-queries/src/db/mod.rs | 4 +- .../src/db/queries/network_interface.rs | 15 +- nexus/db-queries/src/lib.rs | 1 + nexus/db-queries/src/transaction_retry.rs | 341 +++++ nexus/src/app/background/dns_config.rs | 11 +- nexus/src/app/background/init.rs | 8 +- nexus/src/app/sagas/disk_create.rs | 32 +- nexus/src/app/sagas/instance_create.rs | 83 +- nexus/src/app/sagas/project_create.rs | 15 +- nexus/src/app/sagas/test_helpers.rs | 25 +- 40 files changed, 3003 insertions(+), 2446 deletions(-) create mode 100644 nexus/db-queries/src/transaction_retry.rs diff --git a/Cargo.lock b/Cargo.lock index 8f855f5219..13b3d6c74e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -250,7 +250,7 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-bb8-diesel" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=1446f7e0c1f05f33a0581abd51fa873c7652ab61#1446f7e0c1f05f33a0581abd51fa873c7652ab61" +source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=ed7ab5ef0513ba303d33efd41d3e9e381169d59b#ed7ab5ef0513ba303d33efd41d3e9e381169d59b" dependencies = [ "async-trait", "bb8", @@ -4047,6 +4047,7 @@ dependencies = [ "pem 1.1.1", "petgraph", "pq-sys", + "rand 0.8.5", "rcgen", "ref-cast", "regex", diff --git a/Cargo.toml b/Cargo.toml index 931d885e7a..c88502bb1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -144,7 +144,7 @@ api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.12" -async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "1446f7e0c1f05f33a0581abd51fa873c7652ab61" } +async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } async-trait = "0.1.74" atomicwrites = "0.4.2" authz-macros = { path = "nexus/authz-macros" } diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 0f6d1b911e..85a6b3139c 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -232,7 +232,7 @@ impl SledUpdate { } /// A set of constraints that can be placed on operations that select a sled. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct SledReservationConstraints { must_select_from: Vec, } diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 94e3a56abf..9d8afd1fea 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -32,6 +32,7 @@ oso.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" +rand.workspace = true ref-cast.workspace = true samael.workspace = true serde.workspace = true diff --git a/nexus/db-queries/src/db/collection_attach.rs b/nexus/db-queries/src/db/collection_attach.rs index ea4d9d5beb..fccc1aa324 100644 --- a/nexus/db-queries/src/db/collection_attach.rs +++ b/nexus/db-queries/src/db/collection_attach.rs @@ -563,12 +563,9 @@ where #[cfg(test)] mod test { use super::*; - use crate::db::{ - self, error::TransactionError, identity::Resource as IdentityResource, - }; + use crate::db::{self, identity::Resource as IdentityResource}; use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - ConnectionManager, + AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, }; use chrono::Utc; use db_macros::Resource; @@ -999,22 +996,12 @@ mod test { .set(resource::dsl::collection_id.eq(collection_id)), ); - type TxnError = - TransactionError>; - let result = conn - .transaction_async(|conn| async move { - attach_query.attach_and_get_result_async(&conn).await.map_err( - |e| match e { - AttachError::DatabaseError(e) => TxnError::from(e), - e => TxnError::CustomError(e), - }, - ) - }) - .await; - // "attach_and_get_result" should return the "attached" resource. - let (returned_collection, returned_resource) = - result.expect("Attach should have worked"); + let (returned_collection, returned_resource) = attach_query + .attach_and_get_result_async(&conn) + .await + .expect("Attach should have worked"); + assert_eq!( returned_resource.collection_id.expect("Expected a collection ID"), collection_id diff --git a/nexus/db-queries/src/db/collection_detach_many.rs b/nexus/db-queries/src/db/collection_detach_many.rs index 8df6d4aed4..986cfb70b7 100644 --- a/nexus/db-queries/src/db/collection_detach_many.rs +++ b/nexus/db-queries/src/db/collection_detach_many.rs @@ -479,12 +479,9 @@ where mod test { use super::*; use crate::db::collection_attach::DatastoreAttachTarget; - use crate::db::{ - self, error::TransactionError, identity::Resource as IdentityResource, - }; + use crate::db::{self, identity::Resource as IdentityResource}; use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - ConnectionManager, + AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, }; use chrono::Utc; use db_macros::Resource; @@ -919,21 +916,12 @@ mod test { .set(resource::dsl::collection_id.eq(Option::::None)), ); - type TxnError = - TransactionError>; - let result = conn - .transaction_async(|conn| async move { - detach_query.detach_and_get_result_async(&conn).await.map_err( - |e| match e { - DetachManyError::DatabaseError(e) => TxnError::from(e), - e => TxnError::CustomError(e), - }, - ) - }) - .await; - // "detach_and_get_result" should return the "detached" resource. - let returned_collection = result.expect("Detach should have worked"); + let returned_collection = detach_query + .detach_and_get_result_async(&conn) + .await + .expect("Detach should have worked"); + // The returned values should be the latest value in the DB. assert_eq!( returned_collection, diff --git a/nexus/db-queries/src/db/datastore/address_lot.rs b/nexus/db-queries/src/db/datastore/address_lot.rs index 97dfb59eba..5c2ffbf1d0 100644 --- a/nexus/db-queries/src/db/datastore/address_lot.rs +++ b/nexus/db-queries/src/db/datastore/address_lot.rs @@ -13,9 +13,9 @@ use crate::db::error::TransactionError; use crate::db::model::Name; use crate::db::model::{AddressLot, AddressLotBlock, AddressLotReservedBlock}; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl, Connection}; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::{AsyncRunQueryDsl, Connection}; use chrono::Utc; -use diesel::result::Error as DieselError; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use diesel_dtrace::DTraceConnection; use ipnetwork::IpNetwork; @@ -45,11 +45,12 @@ impl DataStore { use db::schema::address_lot::dsl as lot_dsl; use db::schema::address_lot_block::dsl as block_dsl; - self.pool_connection_authorized(opctx) - .await? - // TODO https://github.com/oxidecomputer/omicron/issues/2811 - // Audit external networking database transaction usage - .transaction_async(|conn| async move { + let conn = self.pool_connection_authorized(opctx).await?; + + // TODO https://github.com/oxidecomputer/omicron/issues/2811 + // Audit external networking database transaction usage + self.transaction_retry_wrapper("address_lot_create") + .transaction(&conn, |conn| async move { let lot = AddressLot::new(¶ms.identity, params.kind.into()); let db_lot: AddressLot = @@ -81,15 +82,14 @@ impl DataStore { Ok(AddressLotCreateResult { lot: db_lot, blocks: db_blocks }) }) .await - .map_err(|e| match e { - DieselError::DatabaseError(_, _) => public_error_from_diesel( + .map_err(|e| { + public_error_from_diesel( e, ErrorHandler::Conflict( ResourceType::AddressLot, ¶ms.identity.name.as_str(), ), - ), - _ => public_error_from_diesel(e, ErrorHandler::Server), + ) }) } @@ -113,47 +113,54 @@ impl DataStore { LotInUse, } - type TxnError = TransactionError; + let err = OptionalError::new(); // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - let rsvd: Vec = - rsvd_block_dsl::address_lot_rsvd_block - .filter(rsvd_block_dsl::address_lot_id.eq(id)) - .select(AddressLotReservedBlock::as_select()) - .limit(1) - .load_async(&conn) - .await?; - - if !rsvd.is_empty() { - Err(TxnError::CustomError(AddressLotDeleteError::LotInUse))?; - } + self.transaction_retry_wrapper("address_lot_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let rsvd: Vec = + rsvd_block_dsl::address_lot_rsvd_block + .filter(rsvd_block_dsl::address_lot_id.eq(id)) + .select(AddressLotReservedBlock::as_select()) + .limit(1) + .load_async(&conn) + .await?; + + if !rsvd.is_empty() { + return Err(err.bail(AddressLotDeleteError::LotInUse)); + } + + let now = Utc::now(); + diesel::update(lot_dsl::address_lot) + .filter(lot_dsl::time_deleted.is_null()) + .filter(lot_dsl::id.eq(id)) + .set(lot_dsl::time_deleted.eq(now)) + .execute_async(&conn) + .await?; - let now = Utc::now(); - diesel::update(lot_dsl::address_lot) - .filter(lot_dsl::time_deleted.is_null()) - .filter(lot_dsl::id.eq(id)) - .set(lot_dsl::time_deleted.eq(now)) - .execute_async(&conn) - .await?; - - diesel::delete(block_dsl::address_lot_block) - .filter(block_dsl::address_lot_id.eq(id)) - .execute_async(&conn) - .await?; - - Ok(()) - }) - .await - .map_err(|e| match e { - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - TxnError::CustomError(AddressLotDeleteError::LotInUse) => { - Error::invalid_request("lot is in use") - } - }) + diesel::delete(block_dsl::address_lot_block) + .filter(block_dsl::address_lot_id.eq(id)) + .execute_async(&conn) + .await?; + + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + AddressLotDeleteError::LotInUse => { + Error::invalid_request("lot is in use") + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn address_lot_list( diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index ff314a2564..28075b0ded 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -3,11 +3,11 @@ use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::Name; use crate::db::model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use nexus_types::external_api::params; @@ -30,33 +30,33 @@ impl DataStore { use db::schema::{ bgp_announce_set, bgp_announce_set::dsl as announce_set_dsl, }; - let pool = self.pool_connection_authorized(opctx).await?; - - pool.transaction_async(|conn| async move { - let id: Uuid = match &config.bgp_announce_set_id { - NameOrId::Name(name) => { - announce_set_dsl::bgp_announce_set - .filter(bgp_announce_set::time_deleted.is_null()) - .filter(bgp_announce_set::name.eq(name.to_string())) - .select(bgp_announce_set::id) - .limit(1) - .first_async::(&conn) - .await? - } - NameOrId::Id(id) => *id, - }; - - let config = BgpConfig::from_config_create(config, id); - - let result = diesel::insert_into(dsl::bgp_config) - .values(config.clone()) - .returning(BgpConfig::as_returning()) - .get_result_async(&conn) - .await?; - Ok(result) - }) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("bgp_config_set") + .transaction(&conn, |conn| async move { + let id: Uuid = match &config.bgp_announce_set_id { + NameOrId::Name(name) => { + announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::name.eq(name.to_string())) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await? + } + NameOrId::Id(id) => *id, + }; + + let config = BgpConfig::from_config_create(config, id); + + let result = diesel::insert_into(dsl::bgp_config) + .values(config.clone()) + .returning(BgpConfig::as_returning()) + .get_result_async(&conn) + .await?; + Ok(result) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } pub async fn bgp_config_delete( @@ -74,54 +74,59 @@ impl DataStore { enum BgpConfigDeleteError { ConfigInUse, } - type TxnError = TransactionError; - - let pool = self.pool_connection_authorized(opctx).await?; - pool.transaction_async(|conn| async move { - let name_or_id = sel.name_or_id.clone(); - - let id: Uuid = match name_or_id { - NameOrId::Id(id) => id, - NameOrId::Name(name) => { - bgp_config_dsl::bgp_config - .filter(bgp_config::name.eq(name.to_string())) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await? - } - }; - - let count = - sps_bgp_peer_config_dsl::switch_port_settings_bgp_peer_config - .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) - .count() - .execute_async(&conn) - .await?; - - if count > 0 { - return Err(TxnError::CustomError( - BgpConfigDeleteError::ConfigInUse, - )); - } - diesel::update(bgp_config_dsl::bgp_config) - .filter(bgp_config_dsl::id.eq(id)) - .set(bgp_config_dsl::time_deleted.eq(Utc::now())) - .execute_async(&conn) - .await?; + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("bgp_config_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let name_or_id = sel.name_or_id.clone(); + + let id: Uuid = match name_or_id { + NameOrId::Id(id) => id, + NameOrId::Name(name) => { + bgp_config_dsl::bgp_config + .filter(bgp_config::name.eq(name.to_string())) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await? + } + }; + + let count = + sps_bgp_peer_config_dsl::switch_port_settings_bgp_peer_config + .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) + .count() + .execute_async(&conn) + .await?; + + if count > 0 { + return Err(err.bail(BgpConfigDeleteError::ConfigInUse)); + } + + diesel::update(bgp_config_dsl::bgp_config) + .filter(bgp_config_dsl::id.eq(id)) + .set(bgp_config_dsl::time_deleted.eq(Utc::now())) + .execute_async(&conn) + .await?; - Ok(()) - }) - .await - .map_err(|e| match e { - TxnError::CustomError(BgpConfigDeleteError::ConfigInUse) => { - Error::invalid_request("BGP config in use") - } - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + BgpConfigDeleteError::ConfigInUse => { + Error::invalid_request("BGP config in use") + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn bgp_config_get( @@ -131,7 +136,7 @@ impl DataStore { ) -> LookupResult { use db::schema::bgp_config; use db::schema::bgp_config::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = name_or_id.clone(); @@ -140,14 +145,14 @@ impl DataStore { .filter(bgp_config::name.eq(name.to_string())) .select(BgpConfig::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), NameOrId::Id(id) => dsl::bgp_config .filter(bgp_config::id.eq(id)) .select(BgpConfig::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), }?; @@ -162,7 +167,7 @@ impl DataStore { ) -> ListResultVec { use db::schema::bgp_config::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; match pagparams { PaginatedBy::Id(pagparams) => { @@ -176,7 +181,7 @@ impl DataStore { } .filter(dsl::time_deleted.is_null()) .select(BgpConfig::as_select()) - .load_async(&*pool) + .load_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } @@ -195,47 +200,64 @@ impl DataStore { enum BgpAnnounceListError { AnnounceSetNotFound(Name), } - type TxnError = TransactionError; - - let pool = self.pool_connection_authorized(opctx).await?; - pool.transaction_async(|conn| async move { - let name_or_id = sel.name_or_id.clone(); - - let announce_id: Uuid = match name_or_id { - NameOrId::Id(id) => id, - NameOrId::Name(name) => announce_set_dsl::bgp_announce_set - .filter(bgp_announce_set::time_deleted.is_null()) - .filter(bgp_announce_set::name.eq(name.to_string())) - .select(bgp_announce_set::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| { - TxnError::CustomError( - BgpAnnounceListError::AnnounceSetNotFound( - Name::from(name.clone()), - ), - ) - })?, - }; - - let result = announce_dsl::bgp_announcement - .filter(announce_dsl::announce_set_id.eq(announce_id)) - .select(BgpAnnouncement::as_select()) - .load_async(&conn) - .await?; - - Ok(result) - }) - .await - .map_err(|e| match e { - TxnError::CustomError( - BgpAnnounceListError::AnnounceSetNotFound(name), - ) => Error::not_found_by_name(ResourceType::BgpAnnounceSet, &name), - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) + + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("bgp_announce_list") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let name_or_id = sel.name_or_id.clone(); + + let announce_id: Uuid = match name_or_id { + NameOrId::Id(id) => id, + NameOrId::Name(name) => { + announce_set_dsl::bgp_announce_set + .filter( + bgp_announce_set::time_deleted.is_null(), + ) + .filter( + bgp_announce_set::name.eq(name.to_string()), + ) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + err.bail_retryable_or( + e, + BgpAnnounceListError::AnnounceSetNotFound( + Name::from(name.clone()), + ) + ) + })? + } + }; + + let result = announce_dsl::bgp_announcement + .filter(announce_dsl::announce_set_id.eq(announce_id)) + .select(BgpAnnouncement::as_select()) + .load_async(&conn) + .await?; + + Ok(result) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + BgpAnnounceListError::AnnounceSetNotFound(name) => { + Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + ) + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn bgp_create_announce_set( @@ -246,37 +268,39 @@ impl DataStore { use db::schema::bgp_announce_set::dsl as announce_set_dsl; use db::schema::bgp_announcement::dsl as bgp_announcement_dsl; - let pool = self.pool_connection_authorized(opctx).await?; - pool.transaction_async(|conn| async move { - let bas: BgpAnnounceSet = announce.clone().into(); + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("bgp_create_announce_set") + .transaction(&conn, |conn| async move { + let bas: BgpAnnounceSet = announce.clone().into(); - let db_as: BgpAnnounceSet = - diesel::insert_into(announce_set_dsl::bgp_announce_set) - .values(bas.clone()) - .returning(BgpAnnounceSet::as_returning()) - .get_result_async::(&conn) - .await?; - - let mut db_annoucements = Vec::new(); - for a in &announce.announcement { - let an = BgpAnnouncement { - announce_set_id: db_as.id(), - address_lot_block_id: bas.identity.id, - network: a.network.into(), - }; - let an = - diesel::insert_into(bgp_announcement_dsl::bgp_announcement) - .values(an.clone()) - .returning(BgpAnnouncement::as_returning()) - .get_result_async::(&conn) + let db_as: BgpAnnounceSet = + diesel::insert_into(announce_set_dsl::bgp_announce_set) + .values(bas.clone()) + .returning(BgpAnnounceSet::as_returning()) + .get_result_async::(&conn) .await?; - db_annoucements.push(an); - } - Ok((db_as, db_annoucements)) - }) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + let mut db_annoucements = Vec::new(); + for a in &announce.announcement { + let an = BgpAnnouncement { + announce_set_id: db_as.id(), + address_lot_block_id: bas.identity.id, + network: a.network.into(), + }; + let an = diesel::insert_into( + bgp_announcement_dsl::bgp_announcement, + ) + .values(an.clone()) + .returning(BgpAnnouncement::as_returning()) + .get_result_async::(&conn) + .await?; + db_annoucements.push(an); + } + + Ok((db_as, db_annoucements)) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } pub async fn bgp_delete_announce_set( @@ -295,57 +319,67 @@ impl DataStore { enum BgpAnnounceSetDeleteError { AnnounceSetInUse, } - type TxnError = TransactionError; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = sel.name_or_id.clone(); - pool.transaction_async(|conn| async move { - let id: Uuid = match name_or_id { - NameOrId::Name(name) => { - announce_set_dsl::bgp_announce_set - .filter(bgp_announce_set::name.eq(name.to_string())) - .select(bgp_announce_set::id) - .limit(1) - .first_async::(&conn) - .await? - } - NameOrId::Id(id) => id, - }; - - let count = bgp_config_dsl::bgp_config - .filter(bgp_config::bgp_announce_set_id.eq(id)) - .count() - .execute_async(&conn) - .await?; - - if count > 0 { - return Err(TxnError::CustomError( - BgpAnnounceSetDeleteError::AnnounceSetInUse, - )); - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_delete_announce_set") + .transaction(&conn, |conn| { + let err = err.clone(); + let name_or_id = name_or_id.clone(); + async move { + let id: Uuid = match name_or_id { + NameOrId::Name(name) => { + announce_set_dsl::bgp_announce_set + .filter( + bgp_announce_set::name.eq(name.to_string()), + ) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await? + } + NameOrId::Id(id) => id, + }; + + let count = bgp_config_dsl::bgp_config + .filter(bgp_config::bgp_announce_set_id.eq(id)) + .count() + .execute_async(&conn) + .await?; - diesel::update(announce_set_dsl::bgp_announce_set) - .filter(announce_set_dsl::id.eq(id)) - .set(announce_set_dsl::time_deleted.eq(Utc::now())) - .execute_async(&conn) - .await?; + if count > 0 { + return Err(err.bail( + BgpAnnounceSetDeleteError::AnnounceSetInUse, + )); + } - diesel::delete(bgp_announcement_dsl::bgp_announcement) - .filter(bgp_announcement_dsl::announce_set_id.eq(id)) - .execute_async(&conn) - .await?; + diesel::update(announce_set_dsl::bgp_announce_set) + .filter(announce_set_dsl::id.eq(id)) + .set(announce_set_dsl::time_deleted.eq(Utc::now())) + .execute_async(&conn) + .await?; - Ok(()) - }) - .await - .map_err(|e| match e { - TxnError::CustomError( - BgpAnnounceSetDeleteError::AnnounceSetInUse, - ) => Error::invalid_request("BGP announce set in use"), - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) + diesel::delete(bgp_announcement_dsl::bgp_announcement) + .filter(bgp_announcement_dsl::announce_set_id.eq(id)) + .execute_async(&conn) + .await?; + + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + BgpAnnounceSetDeleteError::AnnounceSetInUse => { + Error::invalid_request("BGP announce set in use") + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } } diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 39a70f7a1e..e579bb8476 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -8,10 +8,7 @@ use super::DataStore; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::TransactionError; -use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, -}; +use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use camino::{Utf8Path, Utf8PathBuf}; use chrono::Utc; use diesel::prelude::*; @@ -415,30 +412,30 @@ impl DataStore { target: &SemverVersion, sql: &String, ) -> Result<(), Error> { - let result = self.pool_connection_unauthorized().await?.transaction_async(|conn| async move { - if target.to_string() != EARLIEST_SUPPORTED_VERSION { - let validate_version_query = format!("SELECT CAST(\ - IF(\ - (\ - SELECT version = '{current}' and target_version = '{target}'\ - FROM omicron.public.db_metadata WHERE singleton = true\ - ),\ - 'true',\ - 'Invalid starting version for schema change'\ - ) AS BOOL\ - );"); - conn.batch_execute_async(&validate_version_query).await?; - } - conn.batch_execute_async(&sql).await?; - Ok::<_, TransactionError<()>>(()) - }).await; + let conn = self.pool_connection_unauthorized().await?; + + let result = self.transaction_retry_wrapper("apply_schema_update") + .transaction(&conn, |conn| async move { + if target.to_string() != EARLIEST_SUPPORTED_VERSION { + let validate_version_query = format!("SELECT CAST(\ + IF(\ + (\ + SELECT version = '{current}' and target_version = '{target}'\ + FROM omicron.public.db_metadata WHERE singleton = true\ + ),\ + 'true',\ + 'Invalid starting version for schema change'\ + ) AS BOOL\ + );"); + conn.batch_execute_async(&validate_version_query).await?; + } + conn.batch_execute_async(&sql).await?; + Ok(()) + }).await; match result { Ok(()) => Ok(()), - Err(TransactionError::CustomError(())) => panic!("No custom error"), - Err(TransactionError::Database(e)) => { - Err(public_error_from_diesel(e, ErrorHandler::Server)) - } + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), } } diff --git a/nexus/db-queries/src/db/datastore/device_auth.rs b/nexus/db-queries/src/db/datastore/device_auth.rs index e1facb43f6..8d8e09744c 100644 --- a/nexus/db-queries/src/db/datastore/device_auth.rs +++ b/nexus/db-queries/src/db/datastore/device_auth.rs @@ -10,10 +10,8 @@ use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::DeviceAccessToken; use crate::db::model::DeviceAuthRequest; -use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use omicron_common::api::external::CreateResult; @@ -75,35 +73,40 @@ impl DataStore { RequestNotFound, TooManyRequests, } - type TxnError = TransactionError; - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - match delete_request.execute_async(&conn).await? { - 0 => { - Err(TxnError::CustomError(TokenGrantError::RequestNotFound)) + let err = crate::transaction_retry::OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper("device_access_token_create") + .transaction(&conn, |conn| { + let err = err.clone(); + let insert_token = insert_token.clone(); + let delete_request = delete_request.clone(); + async move { + match delete_request.execute_async(&conn).await? { + 0 => Err(err.bail(TokenGrantError::RequestNotFound)), + 1 => Ok(insert_token.get_result_async(&conn).await?), + _ => Err(err.bail(TokenGrantError::TooManyRequests)), } - 1 => Ok(insert_token.get_result_async(&conn).await?), - _ => Err(TxnError::CustomError( - TokenGrantError::TooManyRequests, - )), } }) .await - .map_err(|e| match e { - TxnError::CustomError(TokenGrantError::RequestNotFound) => { - Error::ObjectNotFound { - type_name: ResourceType::DeviceAuthRequest, - lookup_type: LookupType::ByCompositeId( - authz_request.id(), - ), + .map_err(|e| { + if let Some(err) = err.take() { + match err { + TokenGrantError::RequestNotFound => { + Error::ObjectNotFound { + type_name: ResourceType::DeviceAuthRequest, + lookup_type: LookupType::ByCompositeId( + authz_request.id(), + ), + } + } + TokenGrantError::TooManyRequests => { + Error::internal_error("unexpectedly found multiple device auth requests for the same user code") + } } - } - TxnError::CustomError(TokenGrantError::TooManyRequests) => { - Error::internal_error("unexpectedly found multiple device auth requests for the same user code") - } - TxnError::Database(e) => { + } else { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index f7ad97593e..cfd25d6a4f 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -67,7 +67,9 @@ impl DataStore { dns_group: DnsGroup, ) -> ListResultVec { let conn = self.pool_connection_authorized(opctx).await?; - self.dns_zones_list_all_on_connection(opctx, &conn, dns_group).await + Ok(self + .dns_zones_list_all_on_connection(opctx, &conn, dns_group) + .await?) } /// Variant of [`Self::dns_zones_list_all`] which may be called from a @@ -77,7 +79,7 @@ impl DataStore { opctx: &OpContext, conn: &async_bb8_diesel::Connection, dns_group: DnsGroup, - ) -> ListResultVec { + ) -> Result, TransactionError> { use db::schema::dns_zone::dsl; const LIMIT: usize = 5; @@ -88,8 +90,7 @@ impl DataStore { .limit(i64::try_from(LIMIT).unwrap()) .select(DnsZone::as_select()) .load_async(conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .await?; bail_unless!( list.len() < LIMIT, @@ -106,12 +107,14 @@ impl DataStore { opctx: &OpContext, dns_group: DnsGroup, ) -> LookupResult { - self.dns_group_latest_version_conn( - opctx, - &*self.pool_connection_authorized(opctx).await?, - dns_group, - ) - .await + let version = self + .dns_group_latest_version_conn( + opctx, + &*self.pool_connection_authorized(opctx).await?, + dns_group, + ) + .await?; + Ok(version) } pub async fn dns_group_latest_version_conn( @@ -119,7 +122,7 @@ impl DataStore { opctx: &OpContext, conn: &async_bb8_diesel::Connection, dns_group: DnsGroup, - ) -> LookupResult { + ) -> Result> { opctx.authorize(authz::Action::Read, &authz::DNS_CONFIG).await?; use db::schema::dns_version::dsl; let versions = dsl::dns_version @@ -128,8 +131,7 @@ impl DataStore { .limit(1) .select(DnsVersion::as_select()) .load_async(conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .await?; bail_unless!( versions.len() == 1, @@ -377,28 +379,17 @@ impl DataStore { opctx: &OpContext, conn: &async_bb8_diesel::Connection, update: DnsVersionUpdateBuilder, - ) -> Result<(), Error> { + ) -> Result<(), TransactionError> { opctx.authorize(authz::Action::Modify, &authz::DNS_CONFIG).await?; let zones = self .dns_zones_list_all_on_connection(opctx, conn, update.dns_group) .await?; - let result = conn - .transaction_async(|c| async move { - self.dns_update_internal(opctx, &c, update, zones) - .await - .map_err(TransactionError::CustomError) - }) - .await; - - match result { - Ok(()) => Ok(()), - Err(TransactionError::CustomError(e)) => Err(e), - Err(TransactionError::Database(e)) => { - Err(public_error_from_diesel(e, ErrorHandler::Server)) - } - } + conn.transaction_async(|c| async move { + self.dns_update_internal(opctx, &c, update, zones).await + }) + .await } // This must only be used inside a transaction. Otherwise, it may make @@ -409,7 +400,7 @@ impl DataStore { conn: &async_bb8_diesel::Connection, update: DnsVersionUpdateBuilder, zones: Vec, - ) -> Result<(), Error> { + ) -> Result<(), TransactionError> { // TODO-scalability TODO-performance This would be much better as a CTE // for all the usual reasons described in RFD 192. Using an interactive // transaction here means that either we wind up holding database locks @@ -455,10 +446,7 @@ impl DataStore { diesel::insert_into(dsl::dns_version) .values(new_version) .execute_async(conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; + .await?; } { @@ -480,8 +468,7 @@ impl DataStore { ) .set(dsl::version_removed.eq(new_version_num)) .execute_async(conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .await?; bail_unless!( nremoved == ntoremove, @@ -495,10 +482,7 @@ impl DataStore { let nadded = diesel::insert_into(dsl::dns_name) .values(new_names) .execute_async(conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; + .await?; bail_unless!( nadded == ntoadd, @@ -1684,6 +1668,10 @@ mod test { let conn = datastore.pool_connection_for_tests().await.unwrap(); let error = datastore.dns_update(&opctx, &conn, update).await.unwrap_err(); + let error = match error { + TransactionError::CustomError(err) => err, + _ => panic!("Unexpected error: {:?}", error), + }; assert_eq!( error.to_string(), "Internal Error: updated wrong number of dns_name \ @@ -1707,11 +1695,15 @@ mod test { update.add_name(String::from("n2"), records1.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - let error = - datastore.dns_update(&opctx, &conn, update).await.unwrap_err(); + let error = Error::from( + datastore.dns_update(&opctx, &conn, update).await.unwrap_err(), + ); let msg = error.to_string(); - assert!(msg.starts_with("Internal Error: ")); - assert!(msg.contains("violates unique constraint")); + assert!(msg.starts_with("Internal Error: "), "Message: {msg:}"); + assert!( + msg.contains("violates unique constraint"), + "Message: {msg:}" + ); } let dns_config = datastore diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index e663130a84..4e34bfc15c 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -10,7 +10,9 @@ use crate::authz::ApiResource; use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; +use crate::db::error::retryable; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; use crate::db::model::IncompleteExternalIp; @@ -132,7 +134,8 @@ impl DataStore { data: IncompleteExternalIp, ) -> CreateResult { let conn = self.pool_connection_authorized(opctx).await?; - Self::allocate_external_ip_on_connection(&conn, data).await + let ip = Self::allocate_external_ip_on_connection(&conn, data).await?; + Ok(ip) } /// Variant of [Self::allocate_external_ip] which may be called from a @@ -140,23 +143,30 @@ impl DataStore { pub(crate) async fn allocate_external_ip_on_connection( conn: &async_bb8_diesel::Connection, data: IncompleteExternalIp, - ) -> CreateResult { + ) -> Result> { let explicit_ip = data.explicit_ip().is_some(); NextExternalIp::new(data).get_result_async(conn).await.map_err(|e| { use diesel::result::Error::NotFound; match e { NotFound => { if explicit_ip { - Error::invalid_request( + TransactionError::CustomError(Error::invalid_request( "Requested external IP address not available", - ) + )) } else { - Error::invalid_request( + TransactionError::CustomError(Error::invalid_request( "No external IP addresses available", - ) + )) + } + } + _ => { + if retryable(&e) { + return TransactionError::Database(e); } + TransactionError::CustomError( + crate::db::queries::external_ip::from_diesel(e), + ) } - _ => crate::db::queries::external_ip::from_diesel(e), } }) } diff --git a/nexus/db-queries/src/db/datastore/identity_provider.rs b/nexus/db-queries/src/db/datastore/identity_provider.rs index fdc9a020e7..cee577acd6 100644 --- a/nexus/db-queries/src/db/datastore/identity_provider.rs +++ b/nexus/db-queries/src/db/datastore/identity_provider.rs @@ -14,7 +14,6 @@ use crate::db::identity::Resource; use crate::db::model::IdentityProvider; use crate::db::model::Name; use crate::db::pagination::paginated; -use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -63,36 +62,47 @@ impl DataStore { assert_eq!(provider.silo_id, authz_idp_list.silo().id()); let name = provider.identity().name.to_string(); - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - // insert silo identity provider record with type Saml - use db::schema::identity_provider::dsl as idp_dsl; - diesel::insert_into(idp_dsl::identity_provider) - .values(db::model::IdentityProvider { - identity: db::model::IdentityProviderIdentity { - id: provider.identity.id, - name: provider.identity.name.clone(), - description: provider.identity.description.clone(), - time_created: provider.identity.time_created, - time_modified: provider.identity.time_modified, - time_deleted: provider.identity.time_deleted, - }, - silo_id: provider.silo_id, - provider_type: db::model::IdentityProviderType::Saml, - }) - .execute_async(&conn) - .await?; + let conn = self.pool_connection_authorized(opctx).await?; - // insert silo saml identity provider record - use db::schema::saml_identity_provider::dsl; - let result = diesel::insert_into(dsl::saml_identity_provider) - .values(provider) - .returning(db::model::SamlIdentityProvider::as_returning()) - .get_result_async(&conn) - .await?; + self.transaction_retry_wrapper("saml_identity_provider_create") + .transaction(&conn, |conn| { + let provider = provider.clone(); + async move { + // insert silo identity provider record with type Saml + use db::schema::identity_provider::dsl as idp_dsl; + diesel::insert_into(idp_dsl::identity_provider) + .values(db::model::IdentityProvider { + identity: db::model::IdentityProviderIdentity { + id: provider.identity.id, + name: provider.identity.name.clone(), + description: provider + .identity + .description + .clone(), + time_created: provider.identity.time_created, + time_modified: provider.identity.time_modified, + time_deleted: provider.identity.time_deleted, + }, + silo_id: provider.silo_id, + provider_type: + db::model::IdentityProviderType::Saml, + }) + .execute_async(&conn) + .await?; - Ok(result) + // insert silo saml identity provider record + use db::schema::saml_identity_provider::dsl; + let result = + diesel::insert_into(dsl::saml_identity_provider) + .values(provider) + .returning( + db::model::SamlIdentityProvider::as_returning(), + ) + .get_result_async(&conn) + .await?; + + Ok(result) + } }) .await .map_err(|e| { diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 44cd7a95b7..2e7f9da5b7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -148,6 +148,7 @@ pub type DataStoreConnection<'a> = pub struct DataStore { pool: Arc, virtual_provisioning_collection_producer: crate::provisioning::Producer, + transaction_retry_producer: crate::transaction_retry::Producer, } // The majority of `DataStore`'s methods live in our submodules as a concession @@ -164,6 +165,8 @@ impl DataStore { pool, virtual_provisioning_collection_producer: crate::provisioning::Producer::new(), + transaction_retry_producer: crate::transaction_retry::Producer::new( + ), }; Ok(datastore) } @@ -210,6 +213,29 @@ impl DataStore { self.virtual_provisioning_collection_producer.clone(), ) .unwrap(); + registry + .register_producer(self.transaction_retry_producer.clone()) + .unwrap(); + } + + /// Constructs a transaction retry helper + /// + /// Automatically wraps the underlying producer + pub fn transaction_retry_wrapper( + &self, + name: &'static str, + ) -> crate::transaction_retry::RetryHelper { + crate::transaction_retry::RetryHelper::new( + &self.transaction_retry_producer, + name, + ) + } + + #[cfg(test)] + pub(crate) fn transaction_retry_producer( + &self, + ) -> &crate::transaction_retry::Producer { + &self.transaction_retry_producer } /// Returns a connection to a connection from the database connection pool. diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 06550e9439..4d4e43c9a7 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -13,7 +13,6 @@ use crate::db::collection_insert::DatastoreCollection; use crate::db::cte_utils::BoxedQuery; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::IncompleteNetworkInterface; use crate::db::model::Instance; use crate::db::model::InstanceNetworkInterface; @@ -25,7 +24,7 @@ use crate::db::model::VpcSubnet; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::network_interface; -use async_bb8_diesel::AsyncConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -466,77 +465,91 @@ impl DataStore { InstanceNotStopped, FailedToUnsetPrimary(DieselError), } - type TxnError = TransactionError; + + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; if primary { - conn.transaction_async(|conn| async move { - let instance_runtime = - instance_query.get_result_async(&conn).await?.runtime_state; - if instance_runtime.propolis_id.is_some() - || instance_runtime.nexus_state != stopped - { - return Err(TxnError::CustomError( - NetworkInterfaceUpdateError::InstanceNotStopped, - )); - } + self.transaction_retry_wrapper("instance_update_network_interface") + .transaction(&conn, |conn| { + let err = err.clone(); + let stopped = stopped.clone(); + let update_target_query = update_target_query.clone(); + async move { + let instance_runtime = + instance_query.get_result_async(&conn).await?.runtime_state; + if instance_runtime.propolis_id.is_some() + || instance_runtime.nexus_state != stopped + { + return Err(err.bail(NetworkInterfaceUpdateError::InstanceNotStopped)); + } - // First, get the primary interface - let primary_interface = - find_primary_query.get_result_async(&conn).await?; - // If the target and primary are different, we need to toggle - // the primary into a secondary. - if primary_interface.identity.id != interface_id { - use crate::db::schema::network_interface::dsl; - if let Err(e) = diesel::update(dsl::network_interface) - .filter(dsl::id.eq(primary_interface.identity.id)) - .filter(dsl::kind.eq(NetworkInterfaceKind::Instance)) - .filter(dsl::time_deleted.is_null()) - .set(dsl::is_primary.eq(false)) - .execute_async(&conn) - .await - { - return Err(TxnError::CustomError( - NetworkInterfaceUpdateError::FailedToUnsetPrimary( - e, - ), - )); - } - } + // First, get the primary interface + let primary_interface = + find_primary_query.get_result_async(&conn).await?; + // If the target and primary are different, we need to toggle + // the primary into a secondary. + if primary_interface.identity.id != interface_id { + use crate::db::schema::network_interface::dsl; + if let Err(e) = diesel::update(dsl::network_interface) + .filter(dsl::id.eq(primary_interface.identity.id)) + .filter(dsl::kind.eq(NetworkInterfaceKind::Instance)) + .filter(dsl::time_deleted.is_null()) + .set(dsl::is_primary.eq(false)) + .execute_async(&conn) + .await + { + return Err(err.bail_retryable_or_else( + e, + |e| NetworkInterfaceUpdateError::FailedToUnsetPrimary(e) + )); + } + } - // In any case, update the actual target - Ok(update_target_query.get_result_async(&conn).await?) - }) + // In any case, update the actual target + update_target_query.get_result_async(&conn).await + } + }).await } else { // In this case, we can just directly apply the updates. By // construction, `updates.primary` is `None`, so nothing will // be done there. The other columns always need to be updated, and // we're only hitting a single row. Note that we still need to // verify the instance is stopped. - conn.transaction_async(|conn| async move { - let instance_state = - instance_query.get_result_async(&conn).await?.runtime_state; - if instance_state.propolis_id.is_some() - || instance_state.nexus_state != stopped - { - return Err(TxnError::CustomError( - NetworkInterfaceUpdateError::InstanceNotStopped, - )); - } - Ok(update_target_query.get_result_async(&conn).await?) - }) + self.transaction_retry_wrapper("instance_update_network_interface") + .transaction(&conn, |conn| { + let err = err.clone(); + let stopped = stopped.clone(); + let update_target_query = update_target_query.clone(); + async move { + let instance_state = + instance_query.get_result_async(&conn).await?.runtime_state; + if instance_state.propolis_id.is_some() + || instance_state.nexus_state != stopped + { + return Err(err.bail(NetworkInterfaceUpdateError::InstanceNotStopped)); + } + update_target_query.get_result_async(&conn).await + } + }).await } - .await // Convert to `InstanceNetworkInterface` before returning, we know // this is valid as we've filtered appropriately above. .map(NetworkInterface::as_instance) - .map_err(|e| match e { - TxnError::CustomError( - NetworkInterfaceUpdateError::InstanceNotStopped, - ) => Error::invalid_request( - "Instance must be stopped to update its network interfaces", - ), - _ => Error::internal_error(&format!("Transaction error: {:?}", e)), + .map_err(|e| { + if let Some(err) = err.take() { + match err { + NetworkInterfaceUpdateError::InstanceNotStopped => { + return Error::invalid_request( + "Instance must be stopped to update its network interfaces", + ); + }, + NetworkInterfaceUpdateError::FailedToUnsetPrimary(err) => { + return public_error_from_diesel(err, ErrorHandler::Server); + }, + } + } + public_error_from_diesel(e, ErrorHandler::Server) }) } } diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index c447b5bf98..ba0c64abfd 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -13,7 +13,6 @@ use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::fixed_data::project::SERVICES_PROJECT; use crate::db::fixed_data::silo::INTERNAL_SILO_ID; use crate::db::identity::Resource; @@ -24,7 +23,8 @@ use crate::db::model::ProjectUpdate; use crate::db::model::Silo; use crate::db::model::VirtualProvisioningCollection; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -151,49 +151,62 @@ impl DataStore { use db::schema::project::dsl; + let err = OptionalError::new(); let name = project.name().as_str().to_string(); + let conn = self.pool_connection_authorized(opctx).await?; + let db_project = self - .pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - let project: Project = Silo::insert_resource( - silo_id, - diesel::insert_into(dsl::project).values(project), - ) - .insert_and_get_result_async(&conn) - .await - .map_err(|e| match e { - AsyncInsertError::CollectionNotFound => { - authz_silo_inner.not_found() - } - AsyncInsertError::DatabaseError(e) => { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::Project, - &name, + .transaction_retry_wrapper("project_create_in_silo") + .transaction(&conn, |conn| { + let err = err.clone(); + + let authz_silo_inner = authz_silo_inner.clone(); + let name = name.clone(); + let project = project.clone(); + async move { + let project: Project = Silo::insert_resource( + silo_id, + diesel::insert_into(dsl::project).values(project), + ) + .insert_and_get_result_async(&conn) + .await + .map_err(|e| match e { + AsyncInsertError::CollectionNotFound => { + err.bail(authz_silo_inner.not_found()) + } + AsyncInsertError::DatabaseError(diesel_error) => err + .bail_retryable_or_else( + diesel_error, + |diesel_error| { + public_error_from_diesel( + diesel_error, + ErrorHandler::Conflict( + ResourceType::Project, + &name, + ), + ) + }, ), - ) - } - })?; - - // Create resource provisioning for the project. - self.virtual_provisioning_collection_create_on_connection( - &conn, - VirtualProvisioningCollection::new( - project.id(), - CollectionTypeProvisioned::Project, - ), - ) - .await?; - Ok(project) + })?; + + // Create resource provisioning for the project. + self.virtual_provisioning_collection_create_on_connection( + &conn, + VirtualProvisioningCollection::new( + project.id(), + CollectionTypeProvisioned::Project, + ), + ) + .await?; + Ok(project) + } }) .await - .map_err(|e| match e { - TransactionError::CustomError(e) => e, - TransactionError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) + .map_err(|e| { + if let Some(err) = err.take() { + return err; } + public_error_from_diesel(e, ErrorHandler::Server) })?; Ok(( @@ -230,47 +243,56 @@ impl DataStore { use db::schema::project::dsl; - type TxnError = TransactionError; - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - let now = Utc::now(); - let updated_rows = diesel::update(dsl::project) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::id.eq(authz_project.id())) - .filter(dsl::rcgen.eq(db_project.rcgen)) - .set(dsl::time_deleted.eq(now)) - .returning(Project::as_returning()) - .execute_async(&conn) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_project), - ) - })?; + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper("project_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let now = Utc::now(); + let updated_rows = diesel::update(dsl::project) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(authz_project.id())) + .filter(dsl::rcgen.eq(db_project.rcgen)) + .set(dsl::time_deleted.eq(now)) + .returning(Project::as_returning()) + .execute_async(&conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource( + authz_project, + ), + ) + }) + })?; + + if updated_rows == 0 { + return Err(err.bail(Error::InvalidRequest { + message: + "deletion failed due to concurrent modification" + .to_string(), + })); + } - if updated_rows == 0 { - return Err(TxnError::CustomError(Error::InvalidRequest { - message: - "deletion failed due to concurrent modification" - .to_string(), - })); + self.virtual_provisioning_collection_delete_on_connection( + &opctx.log, + &conn, + db_project.id(), + ) + .await?; + Ok(()) } - - self.virtual_provisioning_collection_delete_on_connection( - &conn, - db_project.id(), - ) - .await?; - Ok(()) }) .await - .map_err(|e| match e { - TxnError::CustomError(e) => e, - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) + .map_err(|e| { + if let Some(err) = err.take() { + return err; } + public_error_from_diesel(e, ErrorHandler::Server) })?; Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index e11377f11a..a69386cfd0 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -13,8 +13,9 @@ use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; +use crate::db::error::retryable; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; +use crate::db::error::MaybeRetryable::*; use crate::db::fixed_data::silo::INTERNAL_SILO_ID; use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; @@ -26,6 +27,7 @@ use crate::db::model::Rack; use crate::db::model::Zpool; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -58,6 +60,7 @@ use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use omicron_common::bail_unless; use std::net::IpAddr; +use std::sync::{Arc, OnceLock}; use uuid::Uuid; /// Groups arguments related to rack initialization @@ -88,18 +91,30 @@ enum RackInitError { DnsSerialization(Error), Silo(Error), RoleAssignment(Error), + // Retryable database error + Retryable(DieselError), + // Other non-retryable database error + Database(DieselError), } -type TxnError = TransactionError; -impl From for Error { - fn from(e: TxnError) -> Self { +// Catch-all for Diesel error conversion into RackInitError, which +// can also label errors as retryable. +impl From for RackInitError { + fn from(e: DieselError) -> Self { + if retryable(&e) { + Self::Retryable(e) + } else { + Self::Database(e) + } + } +} + +impl From for Error { + fn from(e: RackInitError) -> Self { match e { - TxnError::CustomError(RackInitError::AddingIp(err)) => err, - TxnError::CustomError(RackInitError::AddingNic(err)) => err, - TxnError::CustomError(RackInitError::DatasetInsert { - err, - zpool_id, - }) => match err { + RackInitError::AddingIp(err) => err, + RackInitError::AddingNic(err) => err, + RackInitError::DatasetInsert { err, zpool_id } => match err { AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { type_name: ResourceType::Zpool, lookup_type: LookupType::ById(zpool_id), @@ -108,43 +123,36 @@ impl From for Error { public_error_from_diesel(e, ErrorHandler::Server) } }, - TxnError::CustomError(RackInitError::ServiceInsert(err)) => { - Error::internal_error(&format!( - "failed to insert Service record: {:#}", - err - )) - } - TxnError::CustomError(RackInitError::RackUpdate { - err, - rack_id, - }) => public_error_from_diesel( - err, - ErrorHandler::NotFoundByLookup( - ResourceType::Rack, - LookupType::ById(rack_id), - ), + RackInitError::ServiceInsert(err) => Error::internal_error( + &format!("failed to insert Service record: {:#}", err), ), - TxnError::CustomError(RackInitError::DnsSerialization(err)) => { - Error::internal_error(&format!( - "failed to serialize initial DNS records: {:#}", - err - )) - } - TxnError::CustomError(RackInitError::Silo(err)) => { - Error::internal_error(&format!( - "failed to create recovery Silo: {:#}", - err - )) - } - TxnError::CustomError(RackInitError::RoleAssignment(err)) => { - Error::internal_error(&format!( - "failed to assign role to initial user: {:#}", - err - )) - } - TxnError::Database(e) => { - Error::internal_error(&format!("Transaction error: {}", e)) + RackInitError::RackUpdate { err, rack_id } => { + public_error_from_diesel( + err, + ErrorHandler::NotFoundByLookup( + ResourceType::Rack, + LookupType::ById(rack_id), + ), + ) } + RackInitError::DnsSerialization(err) => Error::internal_error( + &format!("failed to serialize initial DNS records: {:#}", err), + ), + RackInitError::Silo(err) => Error::internal_error(&format!( + "failed to create recovery Silo: {:#}", + err + )), + RackInitError::RoleAssignment(err) => Error::internal_error( + &format!("failed to assign role to initial user: {:#}", err), + ), + RackInitError::Retryable(err) => Error::internal_error(&format!( + "failed operation due to database contention: {:#}", + err + )), + RackInitError::Database(err) => Error::internal_error(&format!( + "failed operation due to database error: {:#}", + err + )), } } } @@ -336,9 +344,6 @@ impl DataStore { Ok(()) } - // The following methods which return a `TxnError` take a `conn` parameter - // which comes from the transaction created in `rack_set_initialized`. - #[allow(clippy::too_many_arguments)] async fn rack_create_recovery_silo( &self, @@ -350,7 +355,7 @@ impl DataStore { recovery_user_id: external_params::UserId, recovery_user_password_hash: omicron_passwords::PasswordHashString, dns_update: DnsVersionUpdateBuilder, - ) -> Result<(), TxnError> { + ) -> Result<(), RackInitError> { let db_silo = self .silo_create_conn( conn, @@ -361,8 +366,10 @@ impl DataStore { dns_update, ) .await - .map_err(RackInitError::Silo) - .map_err(TxnError::CustomError)?; + .map_err(|err| match err.retryable() { + NotRetryable(err) => RackInitError::Silo(err.into()), + Retryable(err) => RackInitError::Retryable(err), + })?; info!(log, "Created recovery silo"); // Create the first user in the initial Recovery Silo @@ -416,8 +423,7 @@ impl DataStore { }], ) .await - .map_err(RackInitError::RoleAssignment) - .map_err(TxnError::CustomError)?; + .map_err(RackInitError::RoleAssignment)?; debug!(log, "Generated role assignment queries"); q1.execute_async(conn).await?; @@ -433,7 +439,7 @@ impl DataStore { log: &slog::Logger, service_pool: &db::model::IpPool, service: internal_params::ServicePutRequest, - ) -> Result<(), TxnError> { + ) -> Result<(), RackInitError> { use internal_params::ServiceKind; let service_db = db::model::Service::new( @@ -443,9 +449,12 @@ impl DataStore { service.address, service.kind.clone().into(), ); - self.service_upsert_conn(conn, service_db).await.map_err(|e| { - TxnError::CustomError(RackInitError::ServiceInsert(e)) - })?; + self.service_upsert_conn(conn, service_db).await.map_err( + |e| match e.retryable() { + Retryable(e) => RackInitError::Retryable(e), + NotRetryable(e) => RackInitError::ServiceInsert(e.into()), + }, + )?; // For services with external connectivity, we record their // explicit IP allocation and create a service NIC as well. @@ -476,9 +485,7 @@ impl DataStore { Some(nic.ip), Some(nic.mac), ) - .map_err(|e| { - TxnError::CustomError(RackInitError::AddingNic(e)) - })?; + .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } ServiceKind::BoundaryNtp { snat, ref nic } => { @@ -500,9 +507,7 @@ impl DataStore { Some(nic.ip), Some(nic.mac), ) - .map_err(|e| { - TxnError::CustomError(RackInitError::AddingNic(e)) - })?; + .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } _ => None, @@ -517,7 +522,10 @@ impl DataStore { IP address for {}", service.kind, ); - TxnError::CustomError(RackInitError::AddingIp(err)) + match err.retryable() { + Retryable(e) => RackInitError::Retryable(e), + NotRetryable(e) => RackInitError::AddingIp(e.into()), + } })?; self.create_network_interface_raw_conn(conn, db_nic) @@ -530,9 +538,10 @@ impl DataStore { _, db::model::NetworkInterfaceKind::Service, ) => Ok(()), - _ => Err(TxnError::CustomError( - RackInitError::AddingNic(e.into_external()), - )), + InsertError::Retryable(err) => { + Err(RackInitError::Retryable(err)) + } + _ => Err(RackInitError::AddingNic(e.into_external())), } })?; } @@ -551,146 +560,187 @@ impl DataStore { opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - let rack_id = rack_init.rack_id; - let services = rack_init.services; - let datasets = rack_init.datasets; - let service_ip_pool_ranges = rack_init.service_ip_pool_ranges; - let internal_dns = rack_init.internal_dns; - let external_dns = rack_init.external_dns; - let (authz_service_pool, service_pool) = self.ip_pools_service_lookup(&opctx).await?; // NOTE: This operation could likely be optimized with a CTE, but given // the low-frequency of calls, this optimization has been deferred. let log = opctx.log.clone(); + let err = Arc::new(OnceLock::new()); + + // NOTE: This transaction cannot yet be made retryable, as it uses + // nested transactions. let rack = self .pool_connection_authorized(opctx) .await? - .transaction_async(|conn| async move { - // Early exit if the rack has already been initialized. - let rack = rack_dsl::rack - .filter(rack_dsl::id.eq(rack_id)) - .select(Rack::as_select()) - .get_result_async(&conn) - .await - .map_err(|e| { - warn!(log, "Initializing Rack: Rack UUID not found"); - TxnError::CustomError(RackInitError::RackUpdate { - err: e, - rack_id, - }) - })?; - if rack.initialized { - info!(log, "Early exit: Rack already initialized"); - return Ok(rack); - } + .transaction_async(|conn| { + let err = err.clone(); + let log = log.clone(); + let authz_service_pool = authz_service_pool.clone(); + let rack_init = rack_init.clone(); + let service_pool = service_pool.clone(); + async move { + let rack_id = rack_init.rack_id; + let services = rack_init.services; + let datasets = rack_init.datasets; + let service_ip_pool_ranges = rack_init.service_ip_pool_ranges; + let internal_dns = rack_init.internal_dns; + let external_dns = rack_init.external_dns; + + // Early exit if the rack has already been initialized. + let rack = rack_dsl::rack + .filter(rack_dsl::id.eq(rack_id)) + .select(Rack::as_select()) + .get_result_async(&conn) + .await + .map_err(|e| { + warn!(log, "Initializing Rack: Rack UUID not found"); + err.set(RackInitError::RackUpdate { + err: e, + rack_id, + }).unwrap(); + DieselError::RollbackTransaction + })?; + if rack.initialized { + info!(log, "Early exit: Rack already initialized"); + return Ok::<_, DieselError>(rack); + } - // Otherwise, insert services and datasets. + // Otherwise, insert services and datasets. - // Set up the IP pool for internal services. - for range in service_ip_pool_ranges { - Self::ip_pool_add_range_on_connection( - &conn, - opctx, - &authz_service_pool, - &range, - ) - .await - .map_err(|err| { - warn!( - log, - "Initializing Rack: Failed to add IP pool range" - ); - TxnError::CustomError(RackInitError::AddingIp(err)) - })?; - } + // Set up the IP pool for internal services. + for range in service_ip_pool_ranges { + Self::ip_pool_add_range_on_connection( + &conn, + opctx, + &authz_service_pool, + &range, + ) + .await + .map_err(|e| { + warn!( + log, + "Initializing Rack: Failed to add IP pool range" + ); + err.set(RackInitError::AddingIp(e)).unwrap(); + DieselError::RollbackTransaction + })?; + } + + // Allocate records for all services. + for service in services { + self.rack_populate_service_records( + &conn, + &log, + &service_pool, + service, + ) + .await + .map_err(|e| { + err.set(e).unwrap(); + DieselError::RollbackTransaction + })?; + } + info!(log, "Inserted services"); + + for dataset in datasets { + use db::schema::dataset::dsl; + let zpool_id = dataset.pool_id; + >::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result_async(&conn) + .await + .map_err(|e| { + err.set(RackInitError::DatasetInsert { + err: e, + zpool_id, + }).unwrap(); + DieselError::RollbackTransaction + })?; + } + info!(log, "Inserted datasets"); - // Allocate records for all services. - for service in services { - self.rack_populate_service_records( + // Insert the initial contents of the internal and external DNS + // zones. + Self::load_dns_data(&conn, internal_dns) + .await + .map_err(|e| { + err.set(RackInitError::DnsSerialization(e)).unwrap(); + DieselError::RollbackTransaction + })?; + info!(log, "Populated DNS tables for internal DNS"); + + Self::load_dns_data(&conn, external_dns) + .await + .map_err(|e| { + err.set(RackInitError::DnsSerialization(e)).unwrap(); + DieselError::RollbackTransaction + })?; + info!(log, "Populated DNS tables for external DNS"); + + // Create the initial Recovery Silo + self.rack_create_recovery_silo( + &opctx, &conn, &log, - &service_pool, - service, - ) - .await?; - } - info!(log, "Inserted services"); - - for dataset in datasets { - use db::schema::dataset::dsl; - let zpool_id = dataset.pool_id; - >::insert_resource( - zpool_id, - diesel::insert_into(dsl::dataset) - .values(dataset.clone()) - .on_conflict(dsl::id) - .do_update() - .set(( - dsl::time_modified.eq(Utc::now()), - dsl::pool_id.eq(excluded(dsl::pool_id)), - dsl::ip.eq(excluded(dsl::ip)), - dsl::port.eq(excluded(dsl::port)), - dsl::kind.eq(excluded(dsl::kind)), - )), + rack_init.recovery_silo, + rack_init.recovery_silo_fq_dns_name, + rack_init.recovery_user_id, + rack_init.recovery_user_password_hash, + rack_init.dns_update, ) - .insert_and_get_result_async(&conn) .await - .map_err(|err| { - TxnError::CustomError(RackInitError::DatasetInsert { - err, - zpool_id, - }) + .map_err(|e| match e { + RackInitError::Retryable(e) => e, + _ => { + err.set(e).unwrap(); + DieselError::RollbackTransaction + }, })?; - } - info!(log, "Inserted datasets"); - - // Insert the initial contents of the internal and external DNS - // zones. - Self::load_dns_data(&conn, internal_dns) - .await - .map_err(RackInitError::DnsSerialization) - .map_err(TxnError::CustomError)?; - info!(log, "Populated DNS tables for internal DNS"); - Self::load_dns_data(&conn, external_dns) - .await - .map_err(RackInitError::DnsSerialization) - .map_err(TxnError::CustomError)?; - info!(log, "Populated DNS tables for external DNS"); - - // Create the initial Recovery Silo - self.rack_create_recovery_silo( - &opctx, - &conn, - &log, - rack_init.recovery_silo, - rack_init.recovery_silo_fq_dns_name, - rack_init.recovery_user_id, - rack_init.recovery_user_password_hash, - rack_init.dns_update, - ) - .await?; - - let rack = diesel::update(rack_dsl::rack) - .filter(rack_dsl::id.eq(rack_id)) - .set(( - rack_dsl::initialized.eq(true), - rack_dsl::time_modified.eq(Utc::now()), - )) - .returning(Rack::as_returning()) - .get_result_async::(&conn) - .await - .map_err(|err| { - TxnError::CustomError(RackInitError::RackUpdate { - err, - rack_id, - }) - })?; - Ok::<_, TxnError>(rack) - }) - .await?; + let rack = diesel::update(rack_dsl::rack) + .filter(rack_dsl::id.eq(rack_id)) + .set(( + rack_dsl::initialized.eq(true), + rack_dsl::time_modified.eq(Utc::now()), + )) + .returning(Rack::as_returning()) + .get_result_async::(&conn) + .await + .map_err(|e| { + if retryable(&e) { + return e; + } + err.set(RackInitError::RackUpdate { + err: e, + rack_id, + }).unwrap(); + DieselError::RollbackTransaction + })?; + Ok(rack) + } + }, + ) + .await + .map_err(|e| { + if let Some(err) = Arc::try_unwrap(err).unwrap().take() { + err.into() + } else { + Error::internal_error(&format!("Transaction error: {}", e)) + } + })?; Ok(rack) } @@ -745,42 +795,54 @@ impl DataStore { use crate::db::schema::external_ip::dsl as extip_dsl; use crate::db::schema::service::dsl as service_dsl; - type TxnError = TransactionError; - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - let ips = extip_dsl::external_ip - .inner_join( - service_dsl::service.on(service_dsl::id - .eq(extip_dsl::parent_id.assume_not_null())), - ) - .filter(extip_dsl::parent_id.is_not_null()) - .filter(extip_dsl::time_deleted.is_null()) - .filter(extip_dsl::is_service) - .filter(service_dsl::kind.eq(db::model::ServiceKind::Nexus)) - .select(ExternalIp::as_select()) - .get_results_async(&conn) - .await? - .into_iter() - .map(|external_ip| external_ip.ip.ip()) - .collect(); - - let dns_zones = self - .dns_zones_list_all_on_connection( - opctx, - &conn, - DnsGroup::External, - ) - .await?; - Ok((ips, dns_zones)) + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("nexus_external_addresses") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let ips = extip_dsl::external_ip + .inner_join( + service_dsl::service.on(service_dsl::id + .eq(extip_dsl::parent_id.assume_not_null())), + ) + .filter(extip_dsl::parent_id.is_not_null()) + .filter(extip_dsl::time_deleted.is_null()) + .filter(extip_dsl::is_service) + .filter( + service_dsl::kind.eq(db::model::ServiceKind::Nexus), + ) + .select(ExternalIp::as_select()) + .get_results_async(&conn) + .await? + .into_iter() + .map(|external_ip| external_ip.ip.ip()) + .collect(); + + let dns_zones = self + .dns_zones_list_all_on_connection( + opctx, + &conn, + DnsGroup::External, + ) + .await + .map_err(|e| match e.retryable() { + NotRetryable(not_retryable_err) => { + err.bail(not_retryable_err) + } + Retryable(retryable_err) => retryable_err, + })?; + + Ok((ips, dns_zones)) + } }) .await - .map_err(|error: TxnError| match error { - TransactionError::CustomError(err) => err, - TransactionError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) + .map_err(|e| { + if let Some(err) = err.take() { + return err.into(); } + public_error_from_diesel(e, ErrorHandler::Server) }) } } @@ -1014,14 +1076,16 @@ mod test { async fn [](db: &DataStore) -> Vec<$model> { use crate::db::schema::$table::dsl; use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; - db.pool_connection_for_tests() + let conn = db.pool_connection_for_tests() .await - .unwrap() - .transaction_async(|conn| async move { + .unwrap(); + + db.transaction_retry_wrapper(concat!("fn_to_get_all_", stringify!($table))) + .transaction(&conn, |conn| async move { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL) .await .unwrap(); - Ok::<_, crate::db::TransactionError<()>>( + Ok( dsl::$table .select($model::as_select()) .get_results_async(&conn) diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 9465fe2792..b055a3e85c 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -10,18 +10,16 @@ use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::lookup::LookupPath; use crate::db::model::Dataset; use crate::db::model::Region; -use async_bb8_diesel::AsyncConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use nexus_types::external_api::params; use omicron_common::api::external; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; -use omicron_common::backoff::{self, BackoffError}; use omicron_common::nexus_config::RegionAllocationStrategy; use slog::Logger; use uuid::Uuid; @@ -152,7 +150,7 @@ impl DataStore { /// Also updates the storage usage on their corresponding datasets. pub async fn regions_hard_delete( &self, - log: &Logger, + _log: &Logger, region_ids: Vec, ) -> DeleteResult { if region_ids.is_empty() { @@ -164,98 +162,79 @@ impl DataStore { #[error("Numeric error: {0}")] NumericError(String), } - type TxnError = TransactionError; - - // Retry this transaction until it succeeds. It's a little heavy in that - // there's a for loop inside that iterates over the datasets the - // argument regions belong to, and it often encounters the "retry - // transaction" error. - let transaction = { - |region_ids: Vec| async { - self.pool_connection_unauthorized() - .await? - .transaction_async(|conn| async move { - use db::schema::dataset::dsl as dataset_dsl; - use db::schema::region::dsl as region_dsl; - - // Remove the regions, collecting datasets they're from. - let datasets = diesel::delete(region_dsl::region) - .filter(region_dsl::id.eq_any(region_ids)) - .returning(region_dsl::dataset_id) - .get_results_async::(&conn).await?; - - // Update datasets to which the regions belonged. - for dataset in datasets { - let dataset_total_occupied_size: Option< - diesel::pg::data_types::PgNumeric, - > = region_dsl::region - .filter(region_dsl::dataset_id.eq(dataset)) - .select(diesel::dsl::sum( - region_dsl::block_size - * region_dsl::blocks_per_extent - * region_dsl::extent_count, - )) - .nullable() - .get_result_async(&conn).await?; - - let dataset_total_occupied_size: i64 = if let Some( - dataset_total_occupied_size, - ) = - dataset_total_occupied_size - { - let dataset_total_occupied_size: db::model::ByteCount = - dataset_total_occupied_size.try_into().map_err( - |e: anyhow::Error| { - TxnError::CustomError( - RegionDeleteError::NumericError( - e.to_string(), - ), - ) - }, - )?; - - dataset_total_occupied_size.into() - } else { - 0 - }; - - diesel::update(dataset_dsl::dataset) - .filter(dataset_dsl::id.eq(dataset)) - .set( - dataset_dsl::size_used - .eq(dataset_total_occupied_size), - ) - .execute_async(&conn).await?; - } - - Ok(()) - }) - .await - .map_err(|e: TxnError| { - if e.retry_transaction() { - BackoffError::transient(Error::internal_error( - &format!("Retryable transaction error {:?}", e) + let err = OptionalError::new(); + let conn = self.pool_connection_unauthorized().await?; + self.transaction_retry_wrapper("regions_hard_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + let region_ids = region_ids.clone(); + async move { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + + // Remove the regions, collecting datasets they're from. + let datasets = diesel::delete(region_dsl::region) + .filter(region_dsl::id.eq_any(region_ids)) + .returning(region_dsl::dataset_id) + .get_results_async::(&conn).await?; + + // Update datasets to which the regions belonged. + for dataset in datasets { + let dataset_total_occupied_size: Option< + diesel::pg::data_types::PgNumeric, + > = region_dsl::region + .filter(region_dsl::dataset_id.eq(dataset)) + .select(diesel::dsl::sum( + region_dsl::block_size + * region_dsl::blocks_per_extent + * region_dsl::extent_count, )) + .nullable() + .get_result_async(&conn).await?; + + let dataset_total_occupied_size: i64 = if let Some( + dataset_total_occupied_size, + ) = + dataset_total_occupied_size + { + let dataset_total_occupied_size: db::model::ByteCount = + dataset_total_occupied_size.try_into().map_err( + |e: anyhow::Error| { + err.bail(RegionDeleteError::NumericError( + e.to_string(), + )) + }, + )?; + + dataset_total_occupied_size.into() } else { - BackoffError::Permanent(Error::internal_error( - &format!("Transaction error: {}", e) - )) + 0 + }; + + diesel::update(dataset_dsl::dataset) + .filter(dataset_dsl::id.eq(dataset)) + .set( + dataset_dsl::size_used + .eq(dataset_total_occupied_size), + ) + .execute_async(&conn).await?; + } + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + RegionDeleteError::NumericError(err) => { + return Error::internal_error( + &format!("Transaction error: {}", err) + ); } - }) - } - }; - - backoff::retry_notify( - backoff::retry_policy_internal_service_aggressive(), - || async { - let region_ids = region_ids.clone(); - transaction(region_ids).await - }, - |e: Error, delay| { - info!(log, "{:?}, trying again in {:?}", e, delay,); - }, - ) - .await + } + } + public_error_from_diesel(e, ErrorHandler::Server) + }) } /// Return the total occupied size for a dataset diff --git a/nexus/db-queries/src/db/datastore/service.rs b/nexus/db-queries/src/db/datastore/service.rs index 40bf250abe..df7ed27a6d 100644 --- a/nexus/db-queries/src/db/datastore/service.rs +++ b/nexus/db-queries/src/db/datastore/service.rs @@ -11,7 +11,9 @@ use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; +use crate::db::error::retryable; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::identity::Asset; use crate::db::model::Service; use crate::db::model::Sled; @@ -38,7 +40,12 @@ impl DataStore { service: Service, ) -> CreateResult { let conn = self.pool_connection_authorized(opctx).await?; - self.service_upsert_conn(&conn, service).await + self.service_upsert_conn(&conn, service).await.map_err(|e| match e { + TransactionError::CustomError(err) => err, + TransactionError::Database(err) => { + public_error_from_diesel(err, ErrorHandler::Server) + } + }) } /// Stores a new service in the database (using an existing db connection). @@ -46,7 +53,7 @@ impl DataStore { &self, conn: &async_bb8_diesel::Connection, service: Service, - ) -> CreateResult { + ) -> Result> { use db::schema::service::dsl; let service_id = service.id(); @@ -68,17 +75,24 @@ impl DataStore { .insert_and_get_result_async(conn) .await .map_err(|e| match e { - AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { - type_name: ResourceType::Sled, - lookup_type: LookupType::ById(sled_id), - }, - AsyncInsertError::DatabaseError(e) => public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::Service, - &service_id.to_string(), - ), - ), + AsyncInsertError::CollectionNotFound => { + TransactionError::CustomError(Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }) + } + AsyncInsertError::DatabaseError(e) => { + if retryable(&e) { + return TransactionError::Database(e); + } + TransactionError::CustomError(public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::Service, + &service_id.to_string(), + ), + )) + } }) } diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index ec3658c067..ab48ec458f 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -11,6 +11,7 @@ use crate::context::OpContext; use crate::db; use crate::db::datastore::RunnableQuery; use crate::db::error::public_error_from_diesel; +use crate::db::error::retryable; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; use crate::db::fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO}; @@ -123,15 +124,17 @@ impl DataStore { dns_update: DnsVersionUpdateBuilder, ) -> CreateResult { let conn = self.pool_connection_authorized(opctx).await?; - self.silo_create_conn( - &conn, - opctx, - nexus_opctx, - new_silo_params, - new_silo_dns_names, - dns_update, - ) - .await + let silo = self + .silo_create_conn( + &conn, + opctx, + nexus_opctx, + new_silo_params, + new_silo_dns_names, + dns_update, + ) + .await?; + Ok(silo) } pub async fn silo_create_conn( @@ -142,7 +145,7 @@ impl DataStore { new_silo_params: params::SiloCreate, new_silo_dns_names: &[String], dns_update: DnsVersionUpdateBuilder, - ) -> CreateResult { + ) -> Result> { let silo_id = Uuid::new_v4(); let silo_group_id = Uuid::new_v4(); @@ -199,71 +202,71 @@ impl DataStore { None }; - conn.transaction_async(|conn| async move { - let silo = silo_create_query - .get_result_async(&conn) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::Silo, - new_silo_params.identity.name.as_str(), - ), - ) - })?; - self.virtual_provisioning_collection_create_on_connection( - &conn, - VirtualProvisioningCollection::new( - silo.id(), - CollectionTypeProvisioned::Silo, - ), - ) - .await?; - - if let Some(query) = silo_admin_group_ensure_query { - query.get_result_async(&conn).await?; - } - - if let Some(queries) = silo_admin_group_role_assignment_queries { - let (delete_old_query, insert_new_query) = queries; - delete_old_query.execute_async(&conn).await?; - insert_new_query.execute_async(&conn).await?; - } - - let certificates = new_silo_params - .tls_certificates - .into_iter() - .map(|c| { - Certificate::new( + let silo = conn + .transaction_async(|conn| async move { + let silo = silo_create_query + .get_result_async(&conn) + .await + .map_err(|e| { + if retryable(&e) { + return TransactionError::Database(e); + } + TransactionError::CustomError(public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::Silo, + new_silo_params.identity.name.as_str(), + ), + )) + })?; + self.virtual_provisioning_collection_create_on_connection( + &conn, + VirtualProvisioningCollection::new( silo.id(), - Uuid::new_v4(), - ServiceKind::Nexus, - c, - new_silo_dns_names, - ) - }) - .collect::, _>>() - .map_err(Error::from)?; - { - use db::schema::certificate::dsl; - diesel::insert_into(dsl::certificate) - .values(certificates) - .execute_async(&conn) - .await?; - } - - self.dns_update(nexus_opctx, &conn, dns_update).await?; + CollectionTypeProvisioned::Silo, + ), + ) + .await?; - Ok(silo) - }) - .await - .map_err(|e| match e { - TransactionError::CustomError(e) => e, - TransactionError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) + if let Some(query) = silo_admin_group_ensure_query { + query.get_result_async(&conn).await?; + } + + if let Some(queries) = silo_admin_group_role_assignment_queries + { + let (delete_old_query, insert_new_query) = queries; + delete_old_query.execute_async(&conn).await?; + insert_new_query.execute_async(&conn).await?; + } + + let certificates = new_silo_params + .tls_certificates + .into_iter() + .map(|c| { + Certificate::new( + silo.id(), + Uuid::new_v4(), + ServiceKind::Nexus, + c, + new_silo_dns_names, + ) + }) + .collect::, _>>() + .map_err(Error::from)?; + { + use db::schema::certificate::dsl; + diesel::insert_into(dsl::certificate) + .values(certificates) + .execute_async(&conn) + .await?; + } + + self.dns_update(nexus_opctx, &conn, dns_update).await?; + + Ok::>(silo) + }) + .await?; + Ok(silo) } pub async fn silos_list_by_id( @@ -380,7 +383,7 @@ impl DataStore { } self.virtual_provisioning_collection_delete_on_connection( - &conn, id, + &opctx.log, &conn, id, ) .await?; diff --git a/nexus/db-queries/src/db/datastore/silo_group.rs b/nexus/db-queries/src/db/datastore/silo_group.rs index 46f4aae7c9..29fcb7490b 100644 --- a/nexus/db-queries/src/db/datastore/silo_group.rs +++ b/nexus/db-queries/src/db/datastore/silo_group.rs @@ -145,35 +145,39 @@ impl DataStore { ) -> UpdateResult<()> { opctx.authorize(authz::Action::Modify, authz_silo_user).await?; - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - use db::schema::silo_group_membership::dsl; + let conn = self.pool_connection_authorized(opctx).await?; - // Delete existing memberships for user - let silo_user_id = authz_silo_user.id(); - diesel::delete(dsl::silo_group_membership) - .filter(dsl::silo_user_id.eq(silo_user_id)) - .execute_async(&conn) - .await?; + self.transaction_retry_wrapper("silo_group_membership_replace_for_user") + .transaction(&conn, |conn| { + let silo_group_ids = silo_group_ids.clone(); + async move { + use db::schema::silo_group_membership::dsl; + + // Delete existing memberships for user + let silo_user_id = authz_silo_user.id(); + diesel::delete(dsl::silo_group_membership) + .filter(dsl::silo_user_id.eq(silo_user_id)) + .execute_async(&conn) + .await?; - // Create new memberships for user - let silo_group_memberships: Vec< - db::model::SiloGroupMembership, - > = silo_group_ids - .iter() - .map(|group_id| db::model::SiloGroupMembership { - silo_group_id: *group_id, - silo_user_id, - }) - .collect(); + // Create new memberships for user + let silo_group_memberships: Vec< + db::model::SiloGroupMembership, + > = silo_group_ids + .iter() + .map(|group_id| db::model::SiloGroupMembership { + silo_group_id: *group_id, + silo_user_id, + }) + .collect(); - diesel::insert_into(dsl::silo_group_membership) - .values(silo_group_memberships) - .execute_async(&conn) - .await?; + diesel::insert_into(dsl::silo_group_membership) + .values(silo_group_memberships) + .execute_async(&conn) + .await?; - Ok(()) + Ok(()) + } }) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 406119a636..023384a9bf 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -10,13 +10,12 @@ use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledUpdate; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; -use async_bb8_diesel::AsyncConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -90,123 +89,141 @@ impl DataStore { enum SledReservationError { NotFound, } - type TxnError = TransactionError; - - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - use db::schema::sled_resource::dsl as resource_dsl; - // Check if resource ID already exists - if so, return it. - let old_resource = resource_dsl::sled_resource - .filter(resource_dsl::id.eq(resource_id)) - .select(SledResource::as_select()) - .limit(1) - .load_async(&conn) - .await?; - - if !old_resource.is_empty() { - return Ok(old_resource[0].clone()); - } - - // If it doesn't already exist, find a sled with enough space - // for the resources we're requesting. - use db::schema::sled::dsl as sled_dsl; - // This answers the boolean question: - // "Does the SUM of all hardware thread usage, plus the one we're trying - // to allocate, consume less threads than exists on the sled?" - let sled_has_space_for_threads = - (diesel::dsl::sql::(&format!( - "COALESCE(SUM(CAST({} as INT8)), 0)", - resource_dsl::hardware_threads::NAME - )) + resources.hardware_threads) - .le(sled_dsl::usable_hardware_threads); - - // This answers the boolean question: - // "Does the SUM of all RAM usage, plus the one we're trying - // to allocate, consume less RAM than exists on the sled?" - let sled_has_space_for_rss = - (diesel::dsl::sql::(&format!( - "COALESCE(SUM(CAST({} as INT8)), 0)", - resource_dsl::rss_ram::NAME - )) + resources.rss_ram) - .le(sled_dsl::usable_physical_ram); - - // Determine whether adding this service's reservoir allocation - // to what's allocated on the sled would avoid going over quota. - let sled_has_space_in_reservoir = - (diesel::dsl::sql::(&format!( - "COALESCE(SUM(CAST({} as INT8)), 0)", - resource_dsl::reservoir_ram::NAME - )) + resources.reservoir_ram) - .le(sled_dsl::reservoir_size); - - // Generate a query describing all of the sleds that have space - // for this reservation. - let mut sled_targets = sled_dsl::sled - .left_join( - resource_dsl::sled_resource - .on(resource_dsl::sled_id.eq(sled_dsl::id)), - ) - .group_by(sled_dsl::id) - .having( - sled_has_space_for_threads - .and(sled_has_space_for_rss) - .and(sled_has_space_in_reservoir), - ) - .filter(sled_dsl::time_deleted.is_null()) - // Filter out sleds that are not provisionable. - .filter( - sled_dsl::provision_state - .eq(db::model::SledProvisionState::Provisionable), - ) - .select(sled_dsl::id) - .into_boxed(); - - // Further constrain the sled IDs according to any caller- - // supplied constraints. - if let Some(must_select_from) = constraints.must_select_from() { - sled_targets = sled_targets - .filter(sled_dsl::id.eq_any(must_select_from.to_vec())); - } - sql_function!(fn random() -> diesel::sql_types::Float); - let sled_targets = sled_targets - .order(random()) - .limit(1) - .get_results_async::(&conn) - .await?; - - if sled_targets.is_empty() { - return Err(TxnError::CustomError( - SledReservationError::NotFound, - )); + let err = OptionalError::new(); + + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper("sled_reservation_create") + .transaction(&conn, |conn| { + // Clone variables into retryable function + let err = err.clone(); + let constraints = constraints.clone(); + let resources = resources.clone(); + + async move { + use db::schema::sled_resource::dsl as resource_dsl; + // Check if resource ID already exists - if so, return it. + let old_resource = resource_dsl::sled_resource + .filter(resource_dsl::id.eq(resource_id)) + .select(SledResource::as_select()) + .limit(1) + .load_async(&conn) + .await?; + + if !old_resource.is_empty() { + return Ok(old_resource[0].clone()); + } + + // If it doesn't already exist, find a sled with enough space + // for the resources we're requesting. + use db::schema::sled::dsl as sled_dsl; + // This answers the boolean question: + // "Does the SUM of all hardware thread usage, plus the one we're trying + // to allocate, consume less threads than exists on the sled?" + let sled_has_space_for_threads = + (diesel::dsl::sql::( + &format!( + "COALESCE(SUM(CAST({} as INT8)), 0)", + resource_dsl::hardware_threads::NAME + ), + ) + resources.hardware_threads) + .le(sled_dsl::usable_hardware_threads); + + // This answers the boolean question: + // "Does the SUM of all RAM usage, plus the one we're trying + // to allocate, consume less RAM than exists on the sled?" + let sled_has_space_for_rss = + (diesel::dsl::sql::( + &format!( + "COALESCE(SUM(CAST({} as INT8)), 0)", + resource_dsl::rss_ram::NAME + ), + ) + resources.rss_ram) + .le(sled_dsl::usable_physical_ram); + + // Determine whether adding this service's reservoir allocation + // to what's allocated on the sled would avoid going over quota. + let sled_has_space_in_reservoir = + (diesel::dsl::sql::( + &format!( + "COALESCE(SUM(CAST({} as INT8)), 0)", + resource_dsl::reservoir_ram::NAME + ), + ) + resources.reservoir_ram) + .le(sled_dsl::reservoir_size); + + // Generate a query describing all of the sleds that have space + // for this reservation. + let mut sled_targets = + sled_dsl::sled + .left_join( + resource_dsl::sled_resource + .on(resource_dsl::sled_id.eq(sled_dsl::id)), + ) + .group_by(sled_dsl::id) + .having( + sled_has_space_for_threads + .and(sled_has_space_for_rss) + .and(sled_has_space_in_reservoir), + ) + .filter(sled_dsl::time_deleted.is_null()) + // Filter out sleds that are not provisionable. + .filter(sled_dsl::provision_state.eq( + db::model::SledProvisionState::Provisionable, + )) + .select(sled_dsl::id) + .into_boxed(); + + // Further constrain the sled IDs according to any caller- + // supplied constraints. + if let Some(must_select_from) = + constraints.must_select_from() + { + sled_targets = sled_targets.filter( + sled_dsl::id.eq_any(must_select_from.to_vec()), + ); + } + + sql_function!(fn random() -> diesel::sql_types::Float); + let sled_targets = sled_targets + .order(random()) + .limit(1) + .get_results_async::(&conn) + .await?; + + if sled_targets.is_empty() { + return Err(err.bail(SledReservationError::NotFound)); + } + + // Create a SledResource record, associate it with the target + // sled. + let resource = SledResource::new( + resource_id, + sled_targets[0], + resource_kind, + resources, + ); + + diesel::insert_into(resource_dsl::sled_resource) + .values(resource) + .returning(SledResource::as_returning()) + .get_result_async(&conn) + .await } - - // Create a SledResource record, associate it with the target - // sled. - let resource = SledResource::new( - resource_id, - sled_targets[0], - resource_kind, - resources, - ); - - Ok(diesel::insert_into(resource_dsl::sled_resource) - .values(resource) - .returning(SledResource::as_returning()) - .get_result_async(&conn) - .await?) }) .await - .map_err(|e| match e { - TxnError::CustomError(SledReservationError::NotFound) => { - external::Error::unavail( - "No sleds can fit the requested instance", - ) - } - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SledReservationError::NotFound => { + return external::Error::unavail( + "No sleds can fit the requested instance", + ); + } + } } + public_error_from_diesel(e, ErrorHandler::Server) }) } diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs index 7c03e4bd40..7a9eb8d2bc 100644 --- a/nexus/db-queries/src/db/datastore/snapshot.rs +++ b/nexus/db-queries/src/db/datastore/snapshot.rs @@ -20,8 +20,7 @@ use crate::db::model::SnapshotState; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; -use crate::db::TransactionError; -use async_bb8_diesel::AsyncConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -48,114 +47,99 @@ impl DataStore { let gen = snapshot.gen; opctx.authorize(authz::Action::CreateChild, authz_project).await?; - #[derive(Debug, thiserror::Error)] - pub enum CustomError { - #[error("Resource already exists")] - ResourceAlreadyExists, - - #[error("saw AsyncInsertError")] - InsertError(AsyncInsertError), - } - - type TxnError = TransactionError; - - let snapshot_name = snapshot.name().to_string(); let project_id = snapshot.project_id; - let snapshot: Snapshot = self - .pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - use db::schema::snapshot::dsl; + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; - // If an undeleted snapshot exists in the database with the - // same name and project but a different id to the snapshot - // this function was passed as an argument, then return an - // error here. - // - // As written below, - // - // .on_conflict((dsl::project_id, dsl::name)) - // .filter_target(dsl::time_deleted.is_null()) - // .do_update() - // .set(dsl::time_modified.eq(dsl::time_modified)) - // - // will set any existing record's `time_modified` if the - // project id and name match, even if the snapshot ID does - // not match. diesel supports adding a filter below like so - // (marked with >>): - // - // .on_conflict((dsl::project_id, dsl::name)) - // .filter_target(dsl::time_deleted.is_null()) - // .do_update() - // .set(dsl::time_modified.eq(dsl::time_modified)) - // >> .filter(dsl::id.eq(snapshot.id())) - // - // which will restrict the `insert_into`'s set so that it - // only applies if the snapshot ID matches. But, - // AsyncInsertError does not have a ObjectAlreadyExists - // variant, so this will be returned as CollectionNotFound - // due to the `insert_into` failing. - // - // If this function is passed a snapshot with an ID that - // does not match, but a project and name that does, return - // ObjectAlreadyExists here. + let snapshot: Snapshot = self + .transaction_retry_wrapper("project_ensure_snapshot") + .transaction(&conn, |conn| { + let err = err.clone(); + let snapshot = snapshot.clone(); + let snapshot_name = snapshot.name().to_string(); + async move { + use db::schema::snapshot::dsl; - let existing_snapshot_id: Option = dsl::snapshot - .filter(dsl::time_deleted.is_null()) - .filter(dsl::name.eq(snapshot.name().to_string())) - .filter(dsl::project_id.eq(snapshot.project_id)) - .select(dsl::id) - .limit(1) - .first_async(&conn) - .await - .optional()?; + // If an undeleted snapshot exists in the database with the + // same name and project but a different id to the snapshot + // this function was passed as an argument, then return an + // error here. + // + // As written below, + // + // .on_conflict((dsl::project_id, dsl::name)) + // .filter_target(dsl::time_deleted.is_null()) + // .do_update() + // .set(dsl::time_modified.eq(dsl::time_modified)) + // + // will set any existing record's `time_modified` if the + // project id and name match, even if the snapshot ID does + // not match. diesel supports adding a filter below like so + // (marked with >>): + // + // .on_conflict((dsl::project_id, dsl::name)) + // .filter_target(dsl::time_deleted.is_null()) + // .do_update() + // .set(dsl::time_modified.eq(dsl::time_modified)) + // >> .filter(dsl::id.eq(snapshot.id())) + // + // which will restrict the `insert_into`'s set so that it + // only applies if the snapshot ID matches. But, + // AsyncInsertError does not have a ObjectAlreadyExists + // variant, so this will be returned as CollectionNotFound + // due to the `insert_into` failing. + // + // If this function is passed a snapshot with an ID that + // does not match, but a project and name that does, return + // ObjectAlreadyExists here. - if let Some(existing_snapshot_id) = existing_snapshot_id { - if existing_snapshot_id != snapshot.id() { - return Err(TransactionError::CustomError( - CustomError::ResourceAlreadyExists, - )); - } - } + let existing_snapshot_id: Option = dsl::snapshot + .filter(dsl::time_deleted.is_null()) + .filter(dsl::name.eq(snapshot.name().to_string())) + .filter(dsl::project_id.eq(snapshot.project_id)) + .select(dsl::id) + .limit(1) + .first_async(&conn) + .await + .optional()?; - Project::insert_resource( - project_id, - diesel::insert_into(dsl::snapshot) - .values(snapshot) - .on_conflict((dsl::project_id, dsl::name)) - .filter_target(dsl::time_deleted.is_null()) - .do_update() - .set(dsl::time_modified.eq(dsl::time_modified)), - ) - .insert_and_get_result_async(&conn) - .await - .map_err(|e| { - TransactionError::CustomError(CustomError::InsertError(e)) - }) - }) - .await - .map_err(|e: TxnError| match e { - TxnError::CustomError(e) => match e { - CustomError::ResourceAlreadyExists => { - Error::ObjectAlreadyExists { - type_name: ResourceType::Snapshot, - object_name: snapshot_name, + if let Some(existing_snapshot_id) = existing_snapshot_id { + if existing_snapshot_id != snapshot.id() { + return Err(err.bail(Error::ObjectAlreadyExists { + type_name: ResourceType::Snapshot, + object_name: snapshot_name, + })); } } - CustomError::InsertError(e) => match e { + + Project::insert_resource( + project_id, + diesel::insert_into(dsl::snapshot) + .values(snapshot) + .on_conflict((dsl::project_id, dsl::name)) + .filter_target(dsl::time_deleted.is_null()) + .do_update() + .set(dsl::time_modified.eq(dsl::time_modified)), + ) + .insert_and_get_result_async(&conn) + .await + .map_err(|e| match e { AsyncInsertError::CollectionNotFound => { - Error::ObjectNotFound { + err.bail(Error::ObjectNotFound { type_name: ResourceType::Project, lookup_type: LookupType::ById(project_id), - } - } - AsyncInsertError::DatabaseError(e) => { - public_error_from_diesel(e, ErrorHandler::Server) + }) } - }, - }, - TxnError::Database(e) => { + AsyncInsertError::DatabaseError(e) => e, + }) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + err + } else { public_error_from_diesel(e, ErrorHandler::Server) } })?; diff --git a/nexus/db-queries/src/db/datastore/switch_interface.rs b/nexus/db-queries/src/db/datastore/switch_interface.rs index 88cff50471..67f16fa08f 100644 --- a/nexus/db-queries/src/db/datastore/switch_interface.rs +++ b/nexus/db-queries/src/db/datastore/switch_interface.rs @@ -11,11 +11,10 @@ use crate::db::datastore::address_lot::{ }; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::LoopbackAddress; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; -use diesel::result::Error as DieselError; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use ipnetwork::IpNetwork; use nexus_types::external_api::params::LoopbackAddressCreate; @@ -40,80 +39,78 @@ impl DataStore { ReserveBlock(ReserveBlockError), } - type TxnError = TransactionError; - let conn = self.pool_connection_authorized(opctx).await?; let inet = IpNetwork::new(params.address, params.mask) .map_err(|_| Error::invalid_request("invalid address"))?; + let err = OptionalError::new(); + // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - let lot_id = authz_address_lot.id(); - let (block, rsvd_block) = - crate::db::datastore::address_lot::try_reserve_block( - lot_id, - inet.ip().into(), - params.anycast, - &conn, - ) - .await - .map_err(|e| match e { - ReserveBlockTxnError::CustomError(err) => { - TxnError::CustomError( - LoopbackAddressCreateError::ReserveBlock(err), + self.transaction_retry_wrapper("loopback_address_create") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let lot_id = authz_address_lot.id(); + let (block, rsvd_block) = + crate::db::datastore::address_lot::try_reserve_block( + lot_id, + inet.ip().into(), + params.anycast, + &conn, ) + .await + .map_err(|e| match e { + ReserveBlockTxnError::CustomError(e) => err.bail( + LoopbackAddressCreateError::ReserveBlock(e), + ), + ReserveBlockTxnError::Database(e) => e, + })?; + + // Address block reserved, now create the loopback address. + + let addr = LoopbackAddress::new( + id, + block.id, + rsvd_block.id, + params.rack_id, + params.switch_location.to_string(), + inet, + params.anycast, + ); + + let db_addr: LoopbackAddress = + diesel::insert_into(dsl::loopback_address) + .values(addr) + .returning(LoopbackAddress::as_returning()) + .get_result_async(&conn) + .await?; + + Ok(db_addr) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + LoopbackAddressCreateError::ReserveBlock( + ReserveBlockError::AddressUnavailable, + ) => Error::invalid_request("address unavailable"), + LoopbackAddressCreateError::ReserveBlock( + ReserveBlockError::AddressNotInLot, + ) => Error::invalid_request("address not in lot"), } - ReserveBlockTxnError::Database(err) => { - TxnError::Database(err) - } - })?; - - // Address block reserved, now create the loopback address. - - let addr = LoopbackAddress::new( - id, - block.id, - rsvd_block.id, - params.rack_id, - params.switch_location.to_string(), - inet, - params.anycast, - ); - - let db_addr: LoopbackAddress = - diesel::insert_into(dsl::loopback_address) - .values(addr) - .returning(LoopbackAddress::as_returning()) - .get_result_async(&conn) - .await?; - - Ok(db_addr) - }) - .await - .map_err(|e| match e { - TxnError::CustomError( - LoopbackAddressCreateError::ReserveBlock( - ReserveBlockError::AddressUnavailable, - ), - ) => Error::invalid_request("address unavailable"), - TxnError::CustomError( - LoopbackAddressCreateError::ReserveBlock( - ReserveBlockError::AddressNotInLot, - ), - ) => Error::invalid_request("address not in lot"), - TxnError::Database(e) => match e { - DieselError::DatabaseError(_, _) => public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::LoopbackAddress, - &format!("lo {}", inet), - ), - ), - _ => public_error_from_diesel(e, ErrorHandler::Server), - }, - }) + } else { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::LoopbackAddress, + &format!("lo {}", inet), + ), + ) + } + }) } pub async fn loopback_address_delete( @@ -130,22 +127,23 @@ impl DataStore { // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - let la = diesel::delete(dsl::loopback_address) - .filter(dsl::id.eq(id)) - .returning(LoopbackAddress::as_returning()) - .get_result_async(&conn) - .await?; - - diesel::delete(rsvd_block_dsl::address_lot_rsvd_block) - .filter(rsvd_block_dsl::id.eq(la.rsvd_address_lot_block_id)) - .execute_async(&conn) - .await?; - - Ok(()) - }) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + self.transaction_retry_wrapper("loopback_address_delete") + .transaction(&conn, |conn| async move { + let la = diesel::delete(dsl::loopback_address) + .filter(dsl::id.eq(id)) + .returning(LoopbackAddress::as_returning()) + .get_result_async(&conn) + .await?; + + diesel::delete(rsvd_block_dsl::address_lot_rsvd_block) + .filter(rsvd_block_dsl::id.eq(la.rsvd_address_lot_block_id)) + .execute_async(&conn) + .await?; + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } pub async fn loopback_address_get( diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 6bd4e61f70..221feee23c 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -11,7 +11,6 @@ use crate::db::datastore::address_lot::{ use crate::db::datastore::UpdatePrecondition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::model::{ LldpServiceConfig, Name, SwitchInterfaceConfig, SwitchPort, SwitchPortAddressConfig, SwitchPortBgpPeerConfig, SwitchPortConfig, @@ -20,8 +19,8 @@ use crate::db::model::{ SwitchVlanInterfaceConfig, }; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; -use diesel::result::Error as DieselError; +use crate::transaction_retry::OptionalError; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ CombineDsl, ExpressionMethods, JoinOnDsl, NullableExpressionMethods, QueryDsl, SelectableHelper, @@ -163,283 +162,285 @@ impl DataStore { BgpConfigNotFound, ReserveBlock(ReserveBlockError), } - type TxnError = TransactionError; type SpsCreateError = SwitchPortSettingsCreateError; + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - // create the top level port settings object - let port_settings = match id { - Some(id) => SwitchPortSettings::with_id(id, ¶ms.identity), - None => SwitchPortSettings::new(¶ms.identity), - }; - //let port_settings = SwitchPortSettings::new(¶ms.identity); - let db_port_settings: SwitchPortSettings = - diesel::insert_into(port_settings_dsl::switch_port_settings) - .values(port_settings) - .returning(SwitchPortSettings::as_returning()) - .get_result_async(&conn) - .await?; - - let psid = db_port_settings.identity.id; - - // add the port config - let port_config = SwitchPortConfig::new( - psid, - params.port_config.geometry.into(), - ); - - let db_port_config: SwitchPortConfig = - diesel::insert_into(port_config_dsl::switch_port_settings_port_config) - .values(port_config) - .returning(SwitchPortConfig::as_returning()) - .get_result_async(&conn) - .await?; - - let mut result = SwitchPortSettingsCombinedResult{ - settings: db_port_settings, - groups: Vec::new(), - port: db_port_config, - links: Vec::new(), - link_lldp: Vec::new(), - interfaces: Vec::new(), - vlan_interfaces: Vec::new(), - routes: Vec::new(), - bgp_peers: Vec::new(), - addresses: Vec::new(), - }; - - //TODO validate link configs consistent with port geometry. - // - https://github.com/oxidecomputer/omicron/issues/2816 - - let mut lldp_config = Vec::with_capacity(params.links.len()); - let mut link_config = Vec::with_capacity(params.links.len()); - - for (link_name, c) in ¶ms.links { - let lldp_config_id = match c.lldp.lldp_config { - Some(_) => todo!(), // TODO actual lldp support - None => None, - }; - let lldp_svc_config = - LldpServiceConfig::new(c.lldp.enabled, lldp_config_id); - - lldp_config.push(lldp_svc_config.clone()); - link_config.push(SwitchPortLinkConfig::new( - psid, - lldp_svc_config.id, - link_name.clone(), - c.mtu, - c.fec.into(), - c.speed.into(), - c.autoneg, - )); - } - result.link_lldp = - diesel::insert_into(lldp_config_dsl::lldp_service_config) - .values(lldp_config.clone()) - .returning(LldpServiceConfig::as_returning()) - .get_results_async(&conn) - .await?; - result.links = - diesel::insert_into( - link_config_dsl::switch_port_settings_link_config) - .values(link_config) - .returning(SwitchPortLinkConfig::as_returning()) - .get_results_async(&conn) - .await?; - - let mut interface_config = Vec::with_capacity(params.interfaces.len()); - let mut vlan_interface_config = Vec::new(); - for (interface_name, i) in ¶ms.interfaces { - let ifx_config = SwitchInterfaceConfig::new( - psid, - interface_name.clone(), - i.v6_enabled, - i.kind.into(), - ); - interface_config.push(ifx_config.clone()); - if let params::SwitchInterfaceKind::Vlan(vlan_if) = i.kind { - vlan_interface_config.push(SwitchVlanInterfaceConfig::new( - ifx_config.id, - vlan_if.vid, - )); - } - } - result.interfaces = - diesel::insert_into( - interface_config_dsl::switch_port_settings_interface_config) - .values(interface_config) - .returning(SwitchInterfaceConfig::as_returning()) - .get_results_async(&conn) - .await?; - result.vlan_interfaces = - diesel::insert_into(vlan_config_dsl::switch_vlan_interface_config) - .values(vlan_interface_config) - .returning(SwitchVlanInterfaceConfig::as_returning()) - .get_results_async(&conn) - .await?; - - - let mut route_config = Vec::with_capacity(params.routes.len()); - - for (interface_name, r) in ¶ms.routes { - for route in &r.routes { - route_config.push(SwitchPortRouteConfig::new( - psid, - interface_name.clone(), - route.dst.into(), - route.gw.into(), - route.vid.map(Into::into), - )); - } - } - result.routes = - diesel::insert_into( - route_config_dsl::switch_port_settings_route_config) - .values(route_config) - .returning(SwitchPortRouteConfig::as_returning()) - .get_results_async(&conn) - .await?; - - let mut bgp_peer_config = Vec::new(); - for (interface_name, peer_config) in ¶ms.bgp_peers { - for p in &peer_config.peers { - use db::schema::bgp_config; - let bgp_config_id = match &p.bgp_config { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name = name.to_string(); - bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| - TxnError::CustomError( - SwitchPortSettingsCreateError::BgpConfigNotFound, - ) - )? - } + self.transaction_retry_wrapper("switch_port_settings_create") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // create the top level port settings object + let port_settings = match id { + Some(id) => SwitchPortSettings::with_id(id, ¶ms.identity), + None => SwitchPortSettings::new(¶ms.identity), }; - - bgp_peer_config.push(SwitchPortBgpPeerConfig::new( + //let port_settings = SwitchPortSettings::new(¶ms.identity); + let db_port_settings: SwitchPortSettings = + diesel::insert_into(port_settings_dsl::switch_port_settings) + .values(port_settings) + .returning(SwitchPortSettings::as_returning()) + .get_result_async(&conn) + .await?; + + let psid = db_port_settings.identity.id; + + // add the port config + let port_config = SwitchPortConfig::new( psid, - bgp_config_id, - interface_name.clone(), - p.addr.into(), - p.hold_time.into(), - p.idle_hold_time.into(), - p.delay_open.into(), - p.connect_retry.into(), - p.keepalive.into(), - )); + params.port_config.geometry.into(), + ); + + let db_port_config: SwitchPortConfig = + diesel::insert_into(port_config_dsl::switch_port_settings_port_config) + .values(port_config) + .returning(SwitchPortConfig::as_returning()) + .get_result_async(&conn) + .await?; + + let mut result = SwitchPortSettingsCombinedResult{ + settings: db_port_settings, + groups: Vec::new(), + port: db_port_config, + links: Vec::new(), + link_lldp: Vec::new(), + interfaces: Vec::new(), + vlan_interfaces: Vec::new(), + routes: Vec::new(), + bgp_peers: Vec::new(), + addresses: Vec::new(), + }; - } - } - result.bgp_peers = - diesel::insert_into( - bgp_peer_dsl::switch_port_settings_bgp_peer_config) - .values(bgp_peer_config) - .returning(SwitchPortBgpPeerConfig::as_returning()) - .get_results_async(&conn) - .await?; + //TODO validate link configs consistent with port geometry. + // - https://github.com/oxidecomputer/omicron/issues/2816 + + let mut lldp_config = Vec::with_capacity(params.links.len()); + let mut link_config = Vec::with_capacity(params.links.len()); + + for (link_name, c) in ¶ms.links { + let lldp_config_id = match c.lldp.lldp_config { + Some(_) => todo!(), // TODO actual lldp support + None => None, + }; + let lldp_svc_config = + LldpServiceConfig::new(c.lldp.enabled, lldp_config_id); + + lldp_config.push(lldp_svc_config.clone()); + link_config.push(SwitchPortLinkConfig::new( + psid, + lldp_svc_config.id, + link_name.clone(), + c.mtu, + c.fec.into(), + c.speed.into(), + c.autoneg, + )); + } + result.link_lldp = + diesel::insert_into(lldp_config_dsl::lldp_service_config) + .values(lldp_config.clone()) + .returning(LldpServiceConfig::as_returning()) + .get_results_async(&conn) + .await?; + result.links = + diesel::insert_into( + link_config_dsl::switch_port_settings_link_config) + .values(link_config) + .returning(SwitchPortLinkConfig::as_returning()) + .get_results_async(&conn) + .await?; + + let mut interface_config = Vec::with_capacity(params.interfaces.len()); + let mut vlan_interface_config = Vec::new(); + for (interface_name, i) in ¶ms.interfaces { + let ifx_config = SwitchInterfaceConfig::new( + psid, + interface_name.clone(), + i.v6_enabled, + i.kind.into(), + ); + interface_config.push(ifx_config.clone()); + if let params::SwitchInterfaceKind::Vlan(vlan_if) = i.kind { + vlan_interface_config.push(SwitchVlanInterfaceConfig::new( + ifx_config.id, + vlan_if.vid, + )); + } + } + result.interfaces = + diesel::insert_into( + interface_config_dsl::switch_port_settings_interface_config) + .values(interface_config) + .returning(SwitchInterfaceConfig::as_returning()) + .get_results_async(&conn) + .await?; + result.vlan_interfaces = + diesel::insert_into(vlan_config_dsl::switch_vlan_interface_config) + .values(vlan_interface_config) + .returning(SwitchVlanInterfaceConfig::as_returning()) + .get_results_async(&conn) + .await?; + + let mut route_config = Vec::with_capacity(params.routes.len()); + + for (interface_name, r) in ¶ms.routes { + for route in &r.routes { + route_config.push(SwitchPortRouteConfig::new( + psid, + interface_name.clone(), + route.dst.into(), + route.gw.into(), + route.vid.map(Into::into), + )); + } + } + result.routes = + diesel::insert_into( + route_config_dsl::switch_port_settings_route_config) + .values(route_config) + .returning(SwitchPortRouteConfig::as_returning()) + .get_results_async(&conn) + .await?; + + let mut bgp_peer_config = Vec::new(); + for (interface_name, peer_config) in ¶ms.bgp_peers { + for p in &peer_config.peers { + use db::schema::bgp_config; + let bgp_config_id = match &p.bgp_config { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name = name.to_string(); + bgp_config_dsl::bgp_config + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::BgpConfigNotFound + ) + })? + } + }; + + bgp_peer_config.push(SwitchPortBgpPeerConfig::new( + psid, + bgp_config_id, + interface_name.clone(), + p.addr.into(), + p.hold_time.into(), + p.idle_hold_time.into(), + p.delay_open.into(), + p.connect_retry.into(), + p.keepalive.into(), + )); - let mut address_config = Vec::new(); - use db::schema::address_lot; - for (interface_name, a) in ¶ms.addresses { - for address in &a.addresses { - let address_lot_id = match &address.address_lot { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name = name.to_string(); - address_lot_dsl::address_lot - .filter(address_lot::time_deleted.is_null()) - .filter(address_lot::name.eq(name)) - .select(address_lot::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| - TxnError::CustomError( - SwitchPortSettingsCreateError::AddressLotNotFound, - ) - )? } - }; - // TODO: Reduce DB round trips needed for reserving ip blocks - // https://github.com/oxidecomputer/omicron/issues/3060 - let (block, rsvd_block) = - crate::db::datastore::address_lot::try_reserve_block( - address_lot_id, - address.address.ip().into(), - // TODO: Should we allow anycast addresses for switch_ports? - // anycast - false, - &conn - ) - .await - .map_err(|e| match e { - ReserveBlockTxnError::CustomError(err) => { - TxnError::CustomError( - SwitchPortSettingsCreateError::ReserveBlock(err) + } + result.bgp_peers = + diesel::insert_into( + bgp_peer_dsl::switch_port_settings_bgp_peer_config) + .values(bgp_peer_config) + .returning(SwitchPortBgpPeerConfig::as_returning()) + .get_results_async(&conn) + .await?; + + let mut address_config = Vec::new(); + use db::schema::address_lot; + for (interface_name, a) in ¶ms.addresses { + for address in &a.addresses { + let address_lot_id = match &address.address_lot { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name = name.to_string(); + address_lot_dsl::address_lot + .filter(address_lot::time_deleted.is_null()) + .filter(address_lot::name.eq(name)) + .select(address_lot::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::AddressLotNotFound + ) + })? + } + }; + // TODO: Reduce DB round trips needed for reserving ip blocks + // https://github.com/oxidecomputer/omicron/issues/3060 + let (block, rsvd_block) = + crate::db::datastore::address_lot::try_reserve_block( + address_lot_id, + address.address.ip().into(), + // TODO: Should we allow anycast addresses for switch_ports? + // anycast + false, + &conn ) - } - ReserveBlockTxnError::Database(err) => TxnError::Database(err), - })?; - - address_config.push(SwitchPortAddressConfig::new( - psid, - block.id, - rsvd_block.id, - address.address.into(), - interface_name.clone(), - )); + .await + .map_err(|e| match e { + ReserveBlockTxnError::CustomError(e) => { + err.bail(SwitchPortSettingsCreateError::ReserveBlock(e)) + } + ReserveBlockTxnError::Database(e) => e, + })?; + + address_config.push(SwitchPortAddressConfig::new( + psid, + block.id, + rsvd_block.id, + address.address.into(), + interface_name.clone(), + )); + } + } + result.addresses = + diesel::insert_into( + address_config_dsl::switch_port_settings_address_config) + .values(address_config) + .returning(SwitchPortAddressConfig::as_returning()) + .get_results_async(&conn) + .await?; + + Ok(result) } } - result.addresses = - diesel::insert_into( - address_config_dsl::switch_port_settings_address_config) - .values(address_config) - .returning(SwitchPortAddressConfig::as_returning()) - .get_results_async(&conn) - .await?; - - Ok(result) - }) + ) .await - .map_err(|e| match e { - TxnError::CustomError(SpsCreateError::AddressLotNotFound) => { - Error::invalid_request("AddressLot not found") - } - TxnError::CustomError(SpsCreateError::BgpConfigNotFound) => { - Error::invalid_request("BGP config not found") - } - TxnError::CustomError( - SwitchPortSettingsCreateError::ReserveBlock( - ReserveBlockError::AddressUnavailable - ) - ) => Error::invalid_request("address unavailable"), - TxnError::CustomError( - SwitchPortSettingsCreateError::ReserveBlock( - ReserveBlockError::AddressNotInLot - ) - ) => Error::invalid_request("address not in lot"), - TxnError::Database(e) => match e { - DieselError::DatabaseError(_, _) => public_error_from_diesel( + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SpsCreateError::AddressLotNotFound => { + Error::invalid_request("AddressLot not found") + } + SpsCreateError::BgpConfigNotFound => { + Error::invalid_request("BGP config not found") + } + SwitchPortSettingsCreateError::ReserveBlock( + ReserveBlockError::AddressUnavailable + ) => Error::invalid_request("address unavailable"), + SwitchPortSettingsCreateError::ReserveBlock( + ReserveBlockError::AddressNotInLot + ) => Error::invalid_request("address not in lot"), + } + } else { + public_error_from_diesel( e, ErrorHandler::Conflict( ResourceType::SwitchPortSettings, params.identity.name.as_str(), ), - ), - _ => public_error_from_diesel(e, ErrorHandler::Server), - }, + ) + } }) } @@ -454,7 +455,6 @@ impl DataStore { enum SwitchPortSettingsDeleteError { SwitchPortSettingsNotFound, } - type TxnError = TransactionError; let conn = self.pool_connection_authorized(opctx).await?; @@ -463,173 +463,178 @@ impl DataStore { Some(name_or_id) => name_or_id, }; + let err = OptionalError::new(); + // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - - use db::schema::switch_port_settings; - let id = match selector { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name = name.to_string(); - port_settings_dsl::switch_port_settings - .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::name.eq(name)) - .select(switch_port_settings::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| - TxnError::CustomError( - SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound, - ) - )? - } - }; + self.transaction_retry_wrapper("switch_port_settings_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + use db::schema::switch_port_settings; + let id = match selector { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name = name.to_string(); + port_settings_dsl::switch_port_settings + .filter(switch_port_settings::time_deleted.is_null()) + .filter(switch_port_settings::name.eq(name)) + .select(switch_port_settings::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound + ) + })? + } + }; - // delete the top level port settings object - diesel::delete(port_settings_dsl::switch_port_settings) - .filter(switch_port_settings::id.eq(id)) - .execute_async(&conn) - .await?; + // delete the top level port settings object + diesel::delete(port_settings_dsl::switch_port_settings) + .filter(switch_port_settings::id.eq(id)) + .execute_async(&conn) + .await?; - // delete the port config object - use db::schema::switch_port_settings_port_config::{ - self as sps_port_config, dsl as port_config_dsl, - }; - diesel::delete(port_config_dsl::switch_port_settings_port_config) - .filter(sps_port_config::port_settings_id.eq(id)) - .execute_async(&conn) - .await?; + // delete the port config object + use db::schema::switch_port_settings_port_config::{ + self as sps_port_config, dsl as port_config_dsl, + }; + diesel::delete(port_config_dsl::switch_port_settings_port_config) + .filter(sps_port_config::port_settings_id.eq(id)) + .execute_async(&conn) + .await?; - // delete the link configs - use db::schema::switch_port_settings_link_config::{ - self as sps_link_config, dsl as link_config_dsl, - }; - let links: Vec = - diesel::delete( - link_config_dsl::switch_port_settings_link_config - ) - .filter( - sps_link_config::port_settings_id.eq(id) - ) - .returning(SwitchPortLinkConfig::as_returning()) - .get_results_async(&conn) - .await?; + // delete the link configs + use db::schema::switch_port_settings_link_config::{ + self as sps_link_config, dsl as link_config_dsl, + }; + let links: Vec = + diesel::delete( + link_config_dsl::switch_port_settings_link_config + ) + .filter( + sps_link_config::port_settings_id.eq(id) + ) + .returning(SwitchPortLinkConfig::as_returning()) + .get_results_async(&conn) + .await?; - // delete lldp configs - use db::schema::lldp_service_config::{self, dsl as lldp_config_dsl}; - let lldp_svc_ids: Vec = links - .iter() - .map(|link| link.lldp_service_config_id) - .collect(); - diesel::delete(lldp_config_dsl::lldp_service_config) - .filter(lldp_service_config::id.eq_any(lldp_svc_ids)) - .execute_async(&conn) - .await?; + // delete lldp configs + use db::schema::lldp_service_config::{self, dsl as lldp_config_dsl}; + let lldp_svc_ids: Vec = links + .iter() + .map(|link| link.lldp_service_config_id) + .collect(); + diesel::delete(lldp_config_dsl::lldp_service_config) + .filter(lldp_service_config::id.eq_any(lldp_svc_ids)) + .execute_async(&conn) + .await?; - // delete interface configs - use db::schema::switch_port_settings_interface_config::{ - self as sps_interface_config, dsl as interface_config_dsl, - }; + // delete interface configs + use db::schema::switch_port_settings_interface_config::{ + self as sps_interface_config, dsl as interface_config_dsl, + }; - let interfaces: Vec = - diesel::delete( - interface_config_dsl::switch_port_settings_interface_config - ) - .filter( - sps_interface_config::port_settings_id.eq(id) - ) - .returning(SwitchInterfaceConfig::as_returning()) - .get_results_async(&conn) - .await?; + let interfaces: Vec = + diesel::delete( + interface_config_dsl::switch_port_settings_interface_config + ) + .filter( + sps_interface_config::port_settings_id.eq(id) + ) + .returning(SwitchInterfaceConfig::as_returning()) + .get_results_async(&conn) + .await?; - // delete any vlan interfaces - use db::schema::switch_vlan_interface_config::{ - self, dsl as vlan_config_dsl, - }; - let interface_ids: Vec = interfaces - .iter() - .map(|interface| interface.id) - .collect(); - - diesel::delete(vlan_config_dsl::switch_vlan_interface_config) - .filter( - switch_vlan_interface_config::interface_config_id.eq_any( - interface_ids + // delete any vlan interfaces + use db::schema::switch_vlan_interface_config::{ + self, dsl as vlan_config_dsl, + }; + let interface_ids: Vec = interfaces + .iter() + .map(|interface| interface.id) + .collect(); + + diesel::delete(vlan_config_dsl::switch_vlan_interface_config) + .filter( + switch_vlan_interface_config::interface_config_id.eq_any( + interface_ids + ) ) + .execute_async(&conn) + .await?; + + // delete route configs + use db::schema::switch_port_settings_route_config; + use db::schema::switch_port_settings_route_config::dsl + as route_config_dsl; + + diesel::delete( + route_config_dsl::switch_port_settings_route_config ) + .filter(switch_port_settings_route_config::port_settings_id.eq(id)) .execute_async(&conn) .await?; - // delete route configs - use db::schema::switch_port_settings_route_config; - use db::schema::switch_port_settings_route_config::dsl - as route_config_dsl; + // delete bgp configurations + use db::schema::switch_port_settings_bgp_peer_config as bgp_peer; + use db::schema::switch_port_settings_bgp_peer_config::dsl + as bgp_peer_dsl; - diesel::delete( - route_config_dsl::switch_port_settings_route_config - ) - .filter(switch_port_settings_route_config::port_settings_id.eq(id)) - .execute_async(&conn) - .await?; + diesel::delete(bgp_peer_dsl::switch_port_settings_bgp_peer_config) + .filter(bgp_peer::port_settings_id.eq(id)) + .execute_async(&conn) + .await?; - // delete bgp configurations - use db::schema::switch_port_settings_bgp_peer_config as bgp_peer; - use db::schema::switch_port_settings_bgp_peer_config::dsl - as bgp_peer_dsl; + // delete address configs + use db::schema::switch_port_settings_address_config::{ + self as address_config, dsl as address_config_dsl, + }; - diesel::delete(bgp_peer_dsl::switch_port_settings_bgp_peer_config) - .filter(bgp_peer::port_settings_id.eq(id)) - .execute_async(&conn) + let port_settings_addrs = diesel::delete( + address_config_dsl::switch_port_settings_address_config, + ) + .filter(address_config::port_settings_id.eq(id)) + .returning(SwitchPortAddressConfig::as_returning()) + .get_results_async(&conn) .await?; - // delete address configs - use db::schema::switch_port_settings_address_config::{ - self as address_config, dsl as address_config_dsl, - }; - - let port_settings_addrs = diesel::delete( - address_config_dsl::switch_port_settings_address_config, - ) - .filter(address_config::port_settings_id.eq(id)) - .returning(SwitchPortAddressConfig::as_returning()) - .get_results_async(&conn) - .await?; + use db::schema::address_lot_rsvd_block::dsl as rsvd_block_dsl; - use db::schema::address_lot_rsvd_block::dsl as rsvd_block_dsl; + for ps in &port_settings_addrs { + diesel::delete(rsvd_block_dsl::address_lot_rsvd_block) + .filter(rsvd_block_dsl::id.eq(ps.rsvd_address_lot_block_id)) + .execute_async(&conn) + .await?; + } - for ps in &port_settings_addrs { - diesel::delete(rsvd_block_dsl::address_lot_rsvd_block) - .filter(rsvd_block_dsl::id.eq(ps.rsvd_address_lot_block_id)) - .execute_async(&conn) - .await?; + Ok(()) } - - Ok(()) }) .await - .map_err(|e| match e { - TxnError::CustomError( - SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound) => { - Error::invalid_request("port settings not found") + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound => { + Error::invalid_request("port settings not found") + } + } + } else { + let name = match ¶ms.port_settings { + Some(name_or_id) => name_or_id.to_string(), + None => String::new(), + }; + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SwitchPortSettings, + &name, + ), + ) } - TxnError::Database(e) => match e { - DieselError::DatabaseError(_, _) => { - let name = match ¶ms.port_settings { - Some(name_or_id) => name_or_id.to_string(), - None => String::new(), - }; - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::SwitchPortSettings, - &name, - ), - ) - }, - _ => public_error_from_diesel(e, ErrorHandler::Server), - }, }) } @@ -666,174 +671,178 @@ impl DataStore { enum SwitchPortSettingsGetError { NotFound(external::Name), } - type TxnError = TransactionError; + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - // get the top level port settings object - use db::schema::switch_port_settings::{ - self, dsl as port_settings_dsl, - }; - - let id = match name_or_id { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name_str = name.to_string(); + self.transaction_retry_wrapper("switch_port_settings_get") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // get the top level port settings object + use db::schema::switch_port_settings::{ + self, dsl as port_settings_dsl, + }; + + let id = match name_or_id { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name_str = name.to_string(); + port_settings_dsl::switch_port_settings + .filter(switch_port_settings::time_deleted.is_null()) + .filter(switch_port_settings::name.eq(name_str)) + .select(switch_port_settings::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or_else(diesel_error, |_| { + SwitchPortSettingsGetError::NotFound( + name.clone(), + ) + }) + })? + } + }; + + let settings: SwitchPortSettings = port_settings_dsl::switch_port_settings .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::name.eq(name_str)) - .select(switch_port_settings::id) + .filter(switch_port_settings::id.eq(id)) + .select(SwitchPortSettings::as_select()) .limit(1) - .first_async::(&conn) - .await - .map_err(|_| { - TxnError::CustomError( - SwitchPortSettingsGetError::NotFound( - name.clone(), - ), - ) - })? - } - }; - - let settings: SwitchPortSettings = - port_settings_dsl::switch_port_settings - .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::id.eq(id)) - .select(SwitchPortSettings::as_select()) - .limit(1) - .first_async::(&conn) - .await?; + .first_async::(&conn) + .await?; - // get the port config - use db::schema::switch_port_settings_port_config::{ - self as port_config, dsl as port_config_dsl, - }; - let port: SwitchPortConfig = - port_config_dsl::switch_port_settings_port_config - .filter(port_config::port_settings_id.eq(id)) - .select(SwitchPortConfig::as_select()) - .limit(1) - .first_async::(&conn) - .await?; + // get the port config + use db::schema::switch_port_settings_port_config::{ + self as port_config, dsl as port_config_dsl, + }; + let port: SwitchPortConfig = + port_config_dsl::switch_port_settings_port_config + .filter(port_config::port_settings_id.eq(id)) + .select(SwitchPortConfig::as_select()) + .limit(1) + .first_async::(&conn) + .await?; - // initialize result - let mut result = - SwitchPortSettingsCombinedResult::new(settings, port); + // initialize result + let mut result = + SwitchPortSettingsCombinedResult::new(settings, port); - // get the link configs - use db::schema::switch_port_settings_link_config::{ - self as link_config, dsl as link_config_dsl, - }; + // get the link configs + use db::schema::switch_port_settings_link_config::{ + self as link_config, dsl as link_config_dsl, + }; - result.links = link_config_dsl::switch_port_settings_link_config - .filter(link_config::port_settings_id.eq(id)) - .select(SwitchPortLinkConfig::as_select()) - .load_async::(&conn) - .await?; + result.links = link_config_dsl::switch_port_settings_link_config + .filter(link_config::port_settings_id.eq(id)) + .select(SwitchPortLinkConfig::as_select()) + .load_async::(&conn) + .await?; - let lldp_svc_ids: Vec = result - .links - .iter() - .map(|link| link.lldp_service_config_id) - .collect(); - - use db::schema::lldp_service_config as lldp_config; - use db::schema::lldp_service_config::dsl as lldp_dsl; - result.link_lldp = lldp_dsl::lldp_service_config - .filter(lldp_config::id.eq_any(lldp_svc_ids)) - .select(LldpServiceConfig::as_select()) - .limit(1) - .load_async::(&conn) - .await?; + let lldp_svc_ids: Vec = result + .links + .iter() + .map(|link| link.lldp_service_config_id) + .collect(); + + use db::schema::lldp_service_config as lldp_config; + use db::schema::lldp_service_config::dsl as lldp_dsl; + result.link_lldp = lldp_dsl::lldp_service_config + .filter(lldp_config::id.eq_any(lldp_svc_ids)) + .select(LldpServiceConfig::as_select()) + .limit(1) + .load_async::(&conn) + .await?; - // get the interface configs - use db::schema::switch_port_settings_interface_config::{ - self as interface_config, dsl as interface_config_dsl, - }; + // get the interface configs + use db::schema::switch_port_settings_interface_config::{ + self as interface_config, dsl as interface_config_dsl, + }; - result.interfaces = - interface_config_dsl::switch_port_settings_interface_config - .filter(interface_config::port_settings_id.eq(id)) - .select(SwitchInterfaceConfig::as_select()) - .load_async::(&conn) + result.interfaces = + interface_config_dsl::switch_port_settings_interface_config + .filter(interface_config::port_settings_id.eq(id)) + .select(SwitchInterfaceConfig::as_select()) + .load_async::(&conn) + .await?; + + use db::schema::switch_vlan_interface_config as vlan_config; + use db::schema::switch_vlan_interface_config::dsl as vlan_dsl; + let interface_ids: Vec = result + .interfaces + .iter() + .map(|interface| interface.id) + .collect(); + + result.vlan_interfaces = vlan_dsl::switch_vlan_interface_config + .filter(vlan_config::interface_config_id.eq_any(interface_ids)) + .select(SwitchVlanInterfaceConfig::as_select()) + .load_async::(&conn) .await?; - use db::schema::switch_vlan_interface_config as vlan_config; - use db::schema::switch_vlan_interface_config::dsl as vlan_dsl; - let interface_ids: Vec = result - .interfaces - .iter() - .map(|interface| interface.id) - .collect(); - - result.vlan_interfaces = vlan_dsl::switch_vlan_interface_config - .filter(vlan_config::interface_config_id.eq_any(interface_ids)) - .select(SwitchVlanInterfaceConfig::as_select()) - .load_async::(&conn) - .await?; - - // get the route configs - use db::schema::switch_port_settings_route_config::{ - self as route_config, dsl as route_config_dsl, - }; + // get the route configs + use db::schema::switch_port_settings_route_config::{ + self as route_config, dsl as route_config_dsl, + }; - result.routes = route_config_dsl::switch_port_settings_route_config - .filter(route_config::port_settings_id.eq(id)) - .select(SwitchPortRouteConfig::as_select()) - .load_async::(&conn) - .await?; + result.routes = route_config_dsl::switch_port_settings_route_config + .filter(route_config::port_settings_id.eq(id)) + .select(SwitchPortRouteConfig::as_select()) + .load_async::(&conn) + .await?; - // get the bgp peer configs - use db::schema::switch_port_settings_bgp_peer_config::{ - self as bgp_peer, dsl as bgp_peer_dsl, - }; + // get the bgp peer configs + use db::schema::switch_port_settings_bgp_peer_config::{ + self as bgp_peer, dsl as bgp_peer_dsl, + }; - result.bgp_peers = - bgp_peer_dsl::switch_port_settings_bgp_peer_config - .filter(bgp_peer::port_settings_id.eq(id)) - .select(SwitchPortBgpPeerConfig::as_select()) - .load_async::(&conn) - .await?; + result.bgp_peers = + bgp_peer_dsl::switch_port_settings_bgp_peer_config + .filter(bgp_peer::port_settings_id.eq(id)) + .select(SwitchPortBgpPeerConfig::as_select()) + .load_async::(&conn) + .await?; - // get the address configs - use db::schema::switch_port_settings_address_config::{ - self as address_config, dsl as address_config_dsl, - }; + // get the address configs + use db::schema::switch_port_settings_address_config::{ + self as address_config, dsl as address_config_dsl, + }; - result.addresses = - address_config_dsl::switch_port_settings_address_config - .filter(address_config::port_settings_id.eq(id)) - .select(SwitchPortAddressConfig::as_select()) - .load_async::(&conn) - .await?; + result.addresses = + address_config_dsl::switch_port_settings_address_config + .filter(address_config::port_settings_id.eq(id)) + .select(SwitchPortAddressConfig::as_select()) + .load_async::(&conn) + .await?; - Ok(result) + Ok(result) + } }) .await - .map_err(|e| match e { - TxnError::CustomError(SwitchPortSettingsGetError::NotFound( - name, - )) => Error::not_found_by_name( - ResourceType::SwitchPortSettings, - &name, - ), - TxnError::Database(e) => match e { - DieselError::DatabaseError(_, _) => { - let name = name_or_id.to_string(); - public_error_from_diesel( - e, - ErrorHandler::Conflict( + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SwitchPortSettingsGetError::NotFound(name) => { + Error::not_found_by_name( ResourceType::SwitchPortSettings, &name, - ), - ) + ) + } } - _ => public_error_from_diesel(e, ErrorHandler::Server), - }, + } else { + let name = name_or_id.to_string(); + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SwitchPortSettings, + &name, + ), + ) + } }) } @@ -850,7 +859,8 @@ impl DataStore { enum SwitchPortCreateError { RackNotFound, } - type TxnError = TransactionError; + + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; let switch_port = SwitchPort::new( @@ -861,46 +871,59 @@ impl DataStore { // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - use db::schema::rack; - use db::schema::rack::dsl as rack_dsl; - rack_dsl::rack - .filter(rack::id.eq(rack_id)) - .select(rack::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| { - TxnError::CustomError(SwitchPortCreateError::RackNotFound) - })?; - - // insert switch port - use db::schema::switch_port::dsl as switch_port_dsl; - let db_switch_port: SwitchPort = - diesel::insert_into(switch_port_dsl::switch_port) - .values(switch_port) - .returning(SwitchPort::as_returning()) - .get_result_async(&conn) - .await?; + self.transaction_retry_wrapper("switch_port_create") + .transaction(&conn, |conn| { + let err = err.clone(); + let switch_port = switch_port.clone(); + async move { + use db::schema::rack; + use db::schema::rack::dsl as rack_dsl; + rack_dsl::rack + .filter(rack::id.eq(rack_id)) + .select(rack::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + err.bail_retryable_or( + e, + SwitchPortCreateError::RackNotFound, + ) + })?; - Ok(db_switch_port) - }) - .await - .map_err(|e| match e { - TxnError::CustomError(SwitchPortCreateError::RackNotFound) => { - Error::invalid_request("rack not found") - } - TxnError::Database(e) => match e { - DieselError::DatabaseError(_, _) => public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::SwitchPort, - &format!("{}/{}/{}", rack_id, &switch_location, &port,), - ), - ), - _ => public_error_from_diesel(e, ErrorHandler::Server), - }, - }) + // insert switch port + use db::schema::switch_port::dsl as switch_port_dsl; + let db_switch_port: SwitchPort = + diesel::insert_into(switch_port_dsl::switch_port) + .values(switch_port) + .returning(SwitchPort::as_returning()) + .get_result_async(&conn) + .await?; + + Ok(db_switch_port) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SwitchPortCreateError::RackNotFound => { + Error::invalid_request("rack not found") + } + } + } else { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SwitchPort, + &format!( + "{}/{}/{}", + rack_id, &switch_location, &port, + ), + ), + ) + } + }) } pub async fn switch_port_delete( @@ -914,58 +937,75 @@ impl DataStore { NotFound, ActiveSettings, } - type TxnError = TransactionError; + + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; // TODO https://github.com/oxidecomputer/omicron/issues/2811 // Audit external networking database transaction usage - conn.transaction_async(|conn| async move { - use db::schema::switch_port; - use db::schema::switch_port::dsl as switch_port_dsl; - - let switch_location = params.switch_location.to_string(); - let port_name = portname.to_string(); - let port: SwitchPort = switch_port_dsl::switch_port - .filter(switch_port::rack_id.eq(params.rack_id)) - .filter( - switch_port::switch_location.eq(switch_location.clone()), - ) - .filter(switch_port::port_name.eq(port_name.clone())) - .select(SwitchPort::as_select()) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| { - TxnError::CustomError(SwitchPortDeleteError::NotFound) - })?; - - if port.port_settings_id.is_some() { - return Err(TxnError::CustomError( - SwitchPortDeleteError::ActiveSettings, - )); - } + self.transaction_retry_wrapper("switch_port_delete") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + use db::schema::switch_port; + use db::schema::switch_port::dsl as switch_port_dsl; + + let switch_location = params.switch_location.to_string(); + let port_name = portname.to_string(); + let port: SwitchPort = switch_port_dsl::switch_port + .filter(switch_port::rack_id.eq(params.rack_id)) + .filter( + switch_port::switch_location + .eq(switch_location.clone()), + ) + .filter(switch_port::port_name.eq(port_name.clone())) + .select(SwitchPort::as_select()) + .limit(1) + .first_async::(&conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortDeleteError::NotFound, + ) + })?; - diesel::delete(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(port.id)) - .execute_async(&conn) - .await?; + if port.port_settings_id.is_some() { + return Err( + err.bail(SwitchPortDeleteError::ActiveSettings) + ); + } - Ok(()) - }) - .await - .map_err(|e| match e { - TxnError::CustomError(SwitchPortDeleteError::NotFound) => { - let name = &portname.clone(); - Error::not_found_by_name(ResourceType::SwitchPort, name) - } - TxnError::CustomError(SwitchPortDeleteError::ActiveSettings) => { - Error::invalid_request("must clear port settings first") - } - TxnError::Database(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) + diesel::delete(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(port.id)) + .execute_async(&conn) + .await?; + + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + SwitchPortDeleteError::NotFound => { + let name = &portname.clone(); + Error::not_found_by_name( + ResourceType::SwitchPort, + name, + ) + } + SwitchPortDeleteError::ActiveSettings => { + Error::invalid_request( + "must clear port settings first", + ) + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn switch_port_list( diff --git a/nexus/db-queries/src/db/datastore/update.rs b/nexus/db-queries/src/db/datastore/update.rs index 8b1eecb781..0790bd458e 100644 --- a/nexus/db-queries/src/db/datastore/update.rs +++ b/nexus/db-queries/src/db/datastore/update.rs @@ -8,15 +8,13 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db; -use crate::db::error::{ - public_error_from_diesel, ErrorHandler, TransactionError, -}; +use crate::db::error::{public_error_from_diesel, ErrorHandler}; use crate::db::model::{ ComponentUpdate, SemverVersion, SystemUpdate, UpdateArtifact, UpdateDeployment, UpdateStatus, UpdateableComponent, }; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use nexus_db_model::SystemUpdateComponentUpdate; @@ -141,36 +139,40 @@ impl DataStore { let version_string = update.version.to_string(); - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - let db_update = diesel::insert_into(component_update::table) - .values(update.clone()) - .returning(ComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - diesel::insert_into(join_table::table) - .values(SystemUpdateComponentUpdate { - system_update_id, - component_update_id: update.id(), - }) - .returning(SystemUpdateComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - Ok(db_update) + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper("create_component_update") + .transaction(&conn, |conn| { + let update = update.clone(); + async move { + let db_update = + diesel::insert_into(component_update::table) + .values(update.clone()) + .returning(ComponentUpdate::as_returning()) + .get_result_async(&conn) + .await?; + + diesel::insert_into(join_table::table) + .values(SystemUpdateComponentUpdate { + system_update_id, + component_update_id: update.id(), + }) + .returning(SystemUpdateComponentUpdate::as_returning()) + .get_result_async(&conn) + .await?; + + Ok(db_update) + } }) .await - .map_err(|e| match e { - TransactionError::CustomError(e) => e, - TransactionError::Database(e) => public_error_from_diesel( + .map_err(|e| { + public_error_from_diesel( e, ErrorHandler::Conflict( ResourceType::ComponentUpdate, &version_string, ), - ), + ) }) } diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index c5c2751723..230c3941ff 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -15,6 +15,7 @@ use crate::db::pool::DbConnection; use crate::db::queries::virtual_provisioning_collection_update::VirtualProvisioningCollectionUpdate; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use diesel::result::Error as DieselError; use omicron_common::api::external::{DeleteResult, Error}; use uuid::Uuid; @@ -52,13 +53,14 @@ impl DataStore { virtual_provisioning_collection, ) .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } pub(crate) async fn virtual_provisioning_collection_create_on_connection( &self, conn: &async_bb8_diesel::Connection, virtual_provisioning_collection: VirtualProvisioningCollection, - ) -> Result, Error> { + ) -> Result, DieselError> { use db::schema::virtual_provisioning_collection::dsl; let provisions: Vec = @@ -66,12 +68,10 @@ impl DataStore { .values(virtual_provisioning_collection) .on_conflict_do_nothing() .get_results_async(conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - self.virtual_provisioning_collection_producer - .append_all_metrics(&provisions)?; + .await?; + let _ = self + .virtual_provisioning_collection_producer + .append_all_metrics(&provisions); Ok(provisions) } @@ -103,16 +103,20 @@ impl DataStore { id: Uuid, ) -> DeleteResult { let conn = self.pool_connection_authorized(opctx).await?; - self.virtual_provisioning_collection_delete_on_connection(&conn, id) - .await + self.virtual_provisioning_collection_delete_on_connection( + &opctx.log, &conn, id, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } /// Delete a [`VirtualProvisioningCollection`] object. pub(crate) async fn virtual_provisioning_collection_delete_on_connection( &self, + log: &slog::Logger, conn: &async_bb8_diesel::Connection, id: Uuid, - ) -> DeleteResult { + ) -> Result<(), DieselError> { use db::schema::virtual_provisioning_collection::dsl; // NOTE: We don't really need to extract the value we're deleting from @@ -122,13 +126,11 @@ impl DataStore { .filter(dsl::id.eq(id)) .returning(VirtualProvisioningCollection::as_select()) .get_result_async(conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .await?; if !collection.is_empty() { - return Err(Error::internal_error(&format!( - "Collection deleted while non-empty: {collection:?}" - ))); + warn!(log, "Collection deleted while non-empty: {collection:?}"); + return Err(DieselError::RollbackTransaction); } Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 5f126050ae..4f31efd610 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -8,15 +8,14 @@ use super::DataStore; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::Region; use crate::db::model::RegionSnapshot; use crate::db::model::Volume; use crate::db::queries::volume::DecreaseCrucibleResourceCountAndSoftDeleteVolume; +use crate::transaction_retry::OptionalError; use anyhow::bail; -use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use diesel::OptionalExtension; @@ -44,7 +43,6 @@ impl DataStore { #[error("Serde error during Volume creation: {0}")] SerdeError(#[from] serde_json::Error), } - type TxnError = TransactionError; // Grab all the targets that the volume construction request references. // Do this outside the transaction, as the data inside volume doesn't @@ -66,86 +64,91 @@ impl DataStore { crucible_targets }; - self.pool_connection_unauthorized() - .await? - .transaction_async(|conn| async move { - let maybe_volume: Option = dsl::volume - .filter(dsl::id.eq(volume.id())) - .select(Volume::as_select()) - .first_async(&conn) - .await - .optional() - .map_err(|e| { - TxnError::CustomError(VolumeCreationError::Public( - public_error_from_diesel(e, ErrorHandler::Server), - )) - })?; - - // If the volume existed already, return it and do not increase - // usage counts. - if let Some(volume) = maybe_volume { - return Ok(volume); - } - - // TODO do we need on_conflict do_nothing here? if the transaction - // model is read-committed, the SELECT above could return nothing, - // and the INSERT here could still result in a conflict. - // - // See also https://github.com/oxidecomputer/omicron/issues/1168 - let volume: Volume = diesel::insert_into(dsl::volume) - .values(volume.clone()) - .on_conflict(dsl::id) - .do_nothing() - .returning(Volume::as_returning()) - .get_result_async(&conn) - .await - .map_err(|e| { - TxnError::CustomError(VolumeCreationError::Public( - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::Volume, - volume.id().to_string().as_str(), - ), - ), - )) - })?; + let err = OptionalError::new(); + let conn = self.pool_connection_unauthorized().await?; + self.transaction_retry_wrapper("volume_create") + .transaction(&conn, |conn| { + let err = err.clone(); + let crucible_targets = crucible_targets.clone(); + let volume = volume.clone(); + async move { + let maybe_volume: Option = dsl::volume + .filter(dsl::id.eq(volume.id())) + .select(Volume::as_select()) + .first_async(&conn) + .await + .optional()?; - // Increase the usage count for Crucible resources according to the - // contents of the volume. + // If the volume existed already, return it and do not increase + // usage counts. + if let Some(volume) = maybe_volume { + return Ok(volume); + } - // Increase the number of uses for each referenced region snapshot. - use db::schema::region_snapshot::dsl as rs_dsl; - for read_only_target in &crucible_targets.read_only_targets { - diesel::update(rs_dsl::region_snapshot) - .filter( - rs_dsl::snapshot_addr.eq(read_only_target.clone()), - ) - .filter(rs_dsl::deleting.eq(false)) - .set( - rs_dsl::volume_references - .eq(rs_dsl::volume_references + 1), - ) - .execute_async(&conn) + // TODO do we need on_conflict do_nothing here? if the transaction + // model is read-committed, the SELECT above could return nothing, + // and the INSERT here could still result in a conflict. + // + // See also https://github.com/oxidecomputer/omicron/issues/1168 + let volume: Volume = diesel::insert_into(dsl::volume) + .values(volume.clone()) + .on_conflict(dsl::id) + .do_nothing() + .returning(Volume::as_returning()) + .get_result_async(&conn) .await .map_err(|e| { - TxnError::CustomError(VolumeCreationError::Public( - public_error_from_diesel( - e, - ErrorHandler::Server, - ), - )) + err.bail_retryable_or_else(e, |e| { + VolumeCreationError::Public( + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::Volume, + volume.id().to_string().as_str(), + ), + ), + ) + }) })?; - } - Ok(volume) + // Increase the usage count for Crucible resources according to the + // contents of the volume. + + // Increase the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl as rs_dsl; + for read_only_target in &crucible_targets.read_only_targets + { + diesel::update(rs_dsl::region_snapshot) + .filter( + rs_dsl::snapshot_addr + .eq(read_only_target.clone()), + ) + .filter(rs_dsl::deleting.eq(false)) + .set( + rs_dsl::volume_references + .eq(rs_dsl::volume_references + 1), + ) + .execute_async(&conn) + .await?; + } + + Ok(volume) + } }) .await - .map_err(|e| match e { - TxnError::CustomError(VolumeCreationError::Public(e)) => e, - - _ => { - Error::internal_error(&format!("Transaction error: {}", e)) + .map_err(|e| { + if let Some(err) = err.take() { + match err { + VolumeCreationError::Public(err) => err, + VolumeCreationError::SerdeError(err) => { + Error::internal_error(&format!( + "Transaction error: {}", + err + )) + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) } }) } @@ -192,16 +195,12 @@ impl DataStore { #[derive(Debug, thiserror::Error)] enum VolumeGetError { - #[error("Error during volume_checkout: {0}")] - DieselError(#[from] diesel::result::Error), - #[error("Serde error during volume_checkout: {0}")] SerdeError(#[from] serde_json::Error), #[error("Updated {0} database rows, expected {1}")] UnexpectedDatabaseUpdate(usize, usize), } - type TxnError = TransactionError; // We perform a transaction here, to be sure that on completion // of this, the database contains an updated version of the @@ -209,141 +208,141 @@ impl DataStore { // types that require it). The generation number (along with the // rest of the volume data) that was in the database is what is // returned to the caller. - self.pool_connection_unauthorized() - .await? - .transaction_async(|conn| async move { - // Grab the volume in question. - let volume = dsl::volume - .filter(dsl::id.eq(volume_id)) - .select(Volume::as_select()) - .get_result_async(&conn) - .await?; - - // Turn the volume.data into the VolumeConstructionRequest - let vcr: VolumeConstructionRequest = - serde_json::from_str(volume.data()).map_err(|e| { - TxnError::CustomError(VolumeGetError::SerdeError(e)) - })?; - - // Look to see if the VCR is a Volume type, and if so, look at - // its sub_volumes. If they are of type Region, then we need - // to update their generation numbers and record that update - // back to the database. We return to the caller whatever the - // original volume data was we pulled from the database. - match vcr { - VolumeConstructionRequest::Volume { - id, - block_size, - sub_volumes, - read_only_parent, - } => { - let mut update_needed = false; - let mut new_sv = Vec::new(); - for sv in sub_volumes { - match sv { - VolumeConstructionRequest::Region { - block_size, - blocks_per_extent, - extent_count, - opts, - gen, - } => { - update_needed = true; - new_sv.push( - VolumeConstructionRequest::Region { - block_size, - blocks_per_extent, - extent_count, - opts, - gen: gen + 1, - }, - ); - } - _ => { - new_sv.push(sv); + let err = OptionalError::new(); + let conn = self.pool_connection_unauthorized().await?; + + self.transaction_retry_wrapper("volume_checkout") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // Grab the volume in question. + let volume = dsl::volume + .filter(dsl::id.eq(volume_id)) + .select(Volume::as_select()) + .get_result_async(&conn) + .await?; + + // Turn the volume.data into the VolumeConstructionRequest + let vcr: VolumeConstructionRequest = + serde_json::from_str(volume.data()).map_err(|e| { + err.bail(VolumeGetError::SerdeError(e)) + })?; + + // Look to see if the VCR is a Volume type, and if so, look at + // its sub_volumes. If they are of type Region, then we need + // to update their generation numbers and record that update + // back to the database. We return to the caller whatever the + // original volume data was we pulled from the database. + match vcr { + VolumeConstructionRequest::Volume { + id, + block_size, + sub_volumes, + read_only_parent, + } => { + let mut update_needed = false; + let mut new_sv = Vec::new(); + for sv in sub_volumes { + match sv { + VolumeConstructionRequest::Region { + block_size, + blocks_per_extent, + extent_count, + opts, + gen, + } => { + update_needed = true; + new_sv.push( + VolumeConstructionRequest::Region { + block_size, + blocks_per_extent, + extent_count, + opts, + gen: gen + 1, + }, + ); + } + _ => { + new_sv.push(sv); + } } } - } - // Only update the volume data if we found the type - // of volume that needed it. - if update_needed { - // Create a new VCR and fill in the contents - // from what the original volume had, but with our - // updated sub_volume records. - let new_vcr = VolumeConstructionRequest::Volume { - id, - block_size, - sub_volumes: new_sv, - read_only_parent, - }; - - let new_volume_data = serde_json::to_string( - &new_vcr, - ) - .map_err(|e| { - TxnError::CustomError( - VolumeGetError::SerdeError(e), - ) - })?; + // Only update the volume data if we found the type + // of volume that needed it. + if update_needed { + // Create a new VCR and fill in the contents + // from what the original volume had, but with our + // updated sub_volume records. + let new_vcr = VolumeConstructionRequest::Volume { + id, + block_size, + sub_volumes: new_sv, + read_only_parent, + }; - // Update the original volume_id with the new - // volume.data. - use db::schema::volume::dsl as volume_dsl; - let num_updated = - diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(volume_id)) - .set(volume_dsl::data.eq(new_volume_data)) - .execute_async(&conn) - .await?; + let new_volume_data = serde_json::to_string( + &new_vcr, + ) + .map_err(|e| { + err.bail(VolumeGetError::SerdeError(e)) + })?; - // This should update just one row. If it does - // not, then something is terribly wrong in the - // database. - if num_updated != 1 { - return Err(TxnError::CustomError( - VolumeGetError::UnexpectedDatabaseUpdate( - num_updated, - 1, - ), - )); + // Update the original volume_id with the new + // volume.data. + use db::schema::volume::dsl as volume_dsl; + let num_updated = + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(volume_id)) + .set(volume_dsl::data.eq(new_volume_data)) + .execute_async(&conn) + .await?; + + // This should update just one row. If it does + // not, then something is terribly wrong in the + // database. + if num_updated != 1 { + return Err(err.bail( + VolumeGetError::UnexpectedDatabaseUpdate( + num_updated, + 1, + ), + )); + } } } + VolumeConstructionRequest::Region { + block_size: _, + blocks_per_extent: _, + extent_count: _, + opts: _, + gen: _, + } => { + // We don't support a pure Region VCR at the volume + // level in the database, so this choice should + // never be encountered, but I want to know if it is. + panic!("Region not supported as a top level volume"); + } + VolumeConstructionRequest::File { + id: _, + block_size: _, + path: _, + } + | VolumeConstructionRequest::Url { + id: _, + block_size: _, + url: _, + } => {} } - VolumeConstructionRequest::Region { - block_size: _, - blocks_per_extent: _, - extent_count: _, - opts: _, - gen: _, - } => { - // We don't support a pure Region VCR at the volume - // level in the database, so this choice should - // never be encountered, but I want to know if it is. - panic!("Region not supported as a top level volume"); - } - VolumeConstructionRequest::File { - id: _, - block_size: _, - path: _, - } - | VolumeConstructionRequest::Url { - id: _, - block_size: _, - url: _, - } => {} + Ok(volume) } - Ok(volume) }) .await - .map_err(|e| match e { - TxnError::CustomError(VolumeGetError::DieselError(e)) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - - _ => { - Error::internal_error(&format!("Transaction error: {}", e)) + .map_err(|e| { + if let Some(err) = err.take() { + return Error::internal_error(&format!("Transaction error: {}", err)); } + public_error_from_diesel(e, ErrorHandler::Server) }) } @@ -638,16 +637,12 @@ impl DataStore { ) -> Result { #[derive(Debug, thiserror::Error)] enum RemoveReadOnlyParentError { - #[error("Error removing read only parent: {0}")] - DieselError(#[from] diesel::result::Error), - #[error("Serde error removing read only parent: {0}")] SerdeError(#[from] serde_json::Error), #[error("Updated {0} database rows, expected {1}")] UnexpectedDatabaseUpdate(usize, usize), } - type TxnError = TransactionError; // In this single transaction: // - Get the given volume from the volume_id from the database @@ -663,170 +658,160 @@ impl DataStore { // data from original volume_id. // - Put the new temp VCR into the temp volume.data, update the // temp_volume in the database. - self.pool_connection_unauthorized() - .await? - .transaction_async(|conn| async move { - // Grab the volume in question. If the volume record was already - // deleted then we can just return. - let volume = { - use db::schema::volume::dsl; - - let volume = dsl::volume - .filter(dsl::id.eq(volume_id)) - .select(Volume::as_select()) - .get_result_async(&conn) - .await - .optional()?; - - let volume = if let Some(v) = volume { - v - } else { - // the volume does not exist, nothing to do. - return Ok(false); + let err = OptionalError::new(); + let conn = self.pool_connection_unauthorized().await?; + self.transaction_retry_wrapper("volume_remove_rop") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // Grab the volume in question. If the volume record was already + // deleted then we can just return. + let volume = { + use db::schema::volume::dsl; + + let volume = dsl::volume + .filter(dsl::id.eq(volume_id)) + .select(Volume::as_select()) + .get_result_async(&conn) + .await + .optional()?; + + let volume = if let Some(v) = volume { + v + } else { + // the volume does not exist, nothing to do. + return Ok(false); + }; + + if volume.time_deleted.is_some() { + // this volume is deleted, so let whatever is deleting + // it clean it up. + return Ok(false); + } else { + // A volume record exists, and was not deleted, we + // can attempt to remove its read_only_parent. + volume + } }; - if volume.time_deleted.is_some() { - // this volume is deleted, so let whatever is deleting - // it clean it up. - return Ok(false); - } else { - // A volume record exists, and was not deleted, we - // can attempt to remove its read_only_parent. - volume - } - }; - - // If a read_only_parent exists, remove it from volume_id, and - // attach it to temp_volume_id. - let vcr: VolumeConstructionRequest = - serde_json::from_str( - volume.data() - ) - .map_err(|e| { - TxnError::CustomError( - RemoveReadOnlyParentError::SerdeError( - e, - ), + // If a read_only_parent exists, remove it from volume_id, and + // attach it to temp_volume_id. + let vcr: VolumeConstructionRequest = + serde_json::from_str( + volume.data() ) - })?; - - match vcr { - VolumeConstructionRequest::Volume { - id, - block_size, - sub_volumes, - read_only_parent, - } => { - if read_only_parent.is_none() { - // This volume has no read_only_parent - Ok(false) - } else { - // Create a new VCR and fill in the contents - // from what the original volume had. - let new_vcr = VolumeConstructionRequest::Volume { - id, - block_size, - sub_volumes, - read_only_parent: None, - }; - - let new_volume_data = - serde_json::to_string( - &new_vcr + .map_err(|e| { + err.bail( + RemoveReadOnlyParentError::SerdeError( + e, ) - .map_err(|e| { - TxnError::CustomError( - RemoveReadOnlyParentError::SerdeError( - e, - ), - ) - })?; + ) + })?; - // Update the original volume_id with the new - // volume.data. - use db::schema::volume::dsl as volume_dsl; - let num_updated = diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(volume_id)) - .set(volume_dsl::data.eq(new_volume_data)) - .execute_async(&conn) - .await?; - - // This should update just one row. If it does - // not, then something is terribly wrong in the - // database. - if num_updated != 1 { - return Err(TxnError::CustomError( - RemoveReadOnlyParentError::UnexpectedDatabaseUpdate(num_updated, 1), - )); - } + match vcr { + VolumeConstructionRequest::Volume { + id, + block_size, + sub_volumes, + read_only_parent, + } => { + if read_only_parent.is_none() { + // This volume has no read_only_parent + Ok(false) + } else { + // Create a new VCR and fill in the contents + // from what the original volume had. + let new_vcr = VolumeConstructionRequest::Volume { + id, + block_size, + sub_volumes, + read_only_parent: None, + }; - // Make a new VCR, with the information from - // our temp_volume_id, but the read_only_parent - // from the original volume. - let rop_vcr = VolumeConstructionRequest::Volume { - id: temp_volume_id, - block_size, - sub_volumes: vec![], - read_only_parent, - }; - let rop_volume_data = - serde_json::to_string( - &rop_vcr - ) - .map_err(|e| { - TxnError::CustomError( - RemoveReadOnlyParentError::SerdeError( - e, - ), + let new_volume_data = + serde_json::to_string( + &new_vcr ) - })?; - // Update the temp_volume_id with the volume - // data that contains the read_only_parent. - let num_updated = - diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(temp_volume_id)) - .filter(volume_dsl::time_deleted.is_null()) - .set(volume_dsl::data.eq(rop_volume_data)) + .map_err(|e| { + err.bail(RemoveReadOnlyParentError::SerdeError( + e, + )) + })?; + + // Update the original volume_id with the new + // volume.data. + use db::schema::volume::dsl as volume_dsl; + let num_updated = diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(volume_id)) + .set(volume_dsl::data.eq(new_volume_data)) .execute_async(&conn) .await?; - if num_updated != 1 { - return Err(TxnError::CustomError( - RemoveReadOnlyParentError::UnexpectedDatabaseUpdate(num_updated, 1), - )); + + // This should update just one row. If it does + // not, then something is terribly wrong in the + // database. + if num_updated != 1 { + return Err(err.bail(RemoveReadOnlyParentError::UnexpectedDatabaseUpdate(num_updated, 1))); + } + + // Make a new VCR, with the information from + // our temp_volume_id, but the read_only_parent + // from the original volume. + let rop_vcr = VolumeConstructionRequest::Volume { + id: temp_volume_id, + block_size, + sub_volumes: vec![], + read_only_parent, + }; + let rop_volume_data = + serde_json::to_string( + &rop_vcr + ) + .map_err(|e| { + err.bail(RemoveReadOnlyParentError::SerdeError( + e, + )) + })?; + // Update the temp_volume_id with the volume + // data that contains the read_only_parent. + let num_updated = + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(temp_volume_id)) + .filter(volume_dsl::time_deleted.is_null()) + .set(volume_dsl::data.eq(rop_volume_data)) + .execute_async(&conn) + .await?; + if num_updated != 1 { + return Err(err.bail(RemoveReadOnlyParentError::UnexpectedDatabaseUpdate(num_updated, 1))); + } + Ok(true) } - Ok(true) } - } - VolumeConstructionRequest::File { id: _, block_size: _, path: _ } - | VolumeConstructionRequest::Region { - block_size: _, - blocks_per_extent: _, - extent_count: _, - opts: _, - gen: _ } - | VolumeConstructionRequest::Url { id: _, block_size: _, url: _ } => { - // Volume has a format that does not contain ROPs - Ok(false) + VolumeConstructionRequest::File { id: _, block_size: _, path: _ } + | VolumeConstructionRequest::Region { + block_size: _, + blocks_per_extent: _, + extent_count: _, + opts: _, + gen: _ } + | VolumeConstructionRequest::Url { id: _, block_size: _, url: _ } => { + // Volume has a format that does not contain ROPs + Ok(false) + } } } }) .await - .map_err(|e| match e { - TxnError::CustomError( - RemoveReadOnlyParentError::DieselError(e), - ) => public_error_from_diesel( - e, - ErrorHandler::Server, - ), - - _ => { - Error::internal_error(&format!("Transaction error: {}", e)) + .map_err(|e| { + if let Some(err) = err.take() { + return Error::internal_error(&format!("Transaction error: {}", err)); } + public_error_from_diesel(e, ErrorHandler::Server) }) } } -#[derive(Default, Debug, Serialize, Deserialize)] +#[derive(Default, Clone, Debug, Serialize, Deserialize)] pub struct CrucibleTargets { pub read_only_targets: Vec, } diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 6db99465a3..069ce63028 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -12,7 +12,6 @@ use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::fixed_data::vpc::SERVICES_VPC_ID; use crate::db::identity::Resource; use crate::db::model::IncompleteVpc; @@ -37,7 +36,7 @@ use crate::db::queries::vpc::InsertVpcQuery; use crate::db::queries::vpc::VniSearchIter; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use crate::db::queries::vpc_subnet::SubnetError; -use async_bb8_diesel::AsyncConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -580,53 +579,65 @@ impl DataStore { .set(dsl::time_deleted.eq(now)); let rules_is_empty = rules.is_empty(); - let insert_new_query = Vpc::insert_resource( - authz_vpc.id(), - diesel::insert_into(dsl::vpc_firewall_rule).values(rules), - ); - #[derive(Debug)] enum FirewallUpdateError { CollectionNotFound, } - type TxnError = TransactionError; + + let err = OptionalError::new(); // TODO-scalability: Ideally this would be a CTE so we don't need to // hold a transaction open across multiple roundtrips from the database, // but for now we're using a transaction due to the severely decreased // legibility of CTEs via diesel right now. - self.pool_connection_authorized(opctx) - .await? - .transaction_async(|conn| async move { - delete_old_query.execute_async(&conn).await?; - - // The generation count update on the vpc table row will take a - // write lock on the row, ensuring that the vpc was not deleted - // concurently. - if rules_is_empty { - return Ok(vec![]); - } - insert_new_query + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper("vpc_update_firewall_rules") + .transaction(&conn, |conn| { + let err = err.clone(); + let delete_old_query = delete_old_query.clone(); + let rules = rules.clone(); + async move { + delete_old_query.execute_async(&conn).await?; + + // The generation count update on the vpc table row will take a + // write lock on the row, ensuring that the vpc was not deleted + // concurently. + if rules_is_empty { + return Ok(vec![]); + } + Vpc::insert_resource( + authz_vpc.id(), + diesel::insert_into(dsl::vpc_firewall_rule) + .values(rules), + ) .insert_and_get_results_async(&conn) .await .map_err(|e| match e { AsyncInsertError::CollectionNotFound => { - TxnError::CustomError( - FirewallUpdateError::CollectionNotFound, - ) + err.bail(FirewallUpdateError::CollectionNotFound) } - AsyncInsertError::DatabaseError(e) => e.into(), + AsyncInsertError::DatabaseError(e) => e, }) + } }) .await - .map_err(|e| match e { - TxnError::CustomError( - FirewallUpdateError::CollectionNotFound, - ) => Error::not_found_by_id(ResourceType::Vpc, &authz_vpc.id()), - TxnError::Database(e) => public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_vpc), - ), + .map_err(|e| { + if let Some(err) = err.take() { + match err { + FirewallUpdateError::CollectionNotFound => { + Error::not_found_by_id( + ResourceType::Vpc, + &authz_vpc.id(), + ) + } + } + } else { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_vpc), + ) + } }) } diff --git a/nexus/db-queries/src/db/error.rs b/nexus/db-queries/src/db/error.rs index cbe2b0a71f..fc7f30da93 100644 --- a/nexus/db-queries/src/db/error.rs +++ b/nexus/db-queries/src/db/error.rs @@ -17,7 +17,7 @@ pub enum TransactionError { /// The customizable error type. /// /// This error should be used for all non-Diesel transaction failures. - #[error("Custom transaction error; {0}")] + #[error("Custom transaction error: {0}")] CustomError(T), /// The Diesel error type. @@ -28,31 +28,61 @@ pub enum TransactionError { Database(#[from] DieselError), } +pub fn retryable(error: &DieselError) -> bool { + match error { + DieselError::DatabaseError(kind, boxed_error_information) => match kind + { + DieselErrorKind::SerializationFailure => { + return boxed_error_information + .message() + .starts_with("restart transaction"); + } + _ => false, + }, + _ => false, + } +} + +/// Identifies if the error is retryable or not. +pub enum MaybeRetryable { + /// The error isn't retryable. + NotRetryable(T), + /// The error is retryable. + Retryable(DieselError), +} + +impl TransactionError { + /// Identifies that the error could be returned from a Diesel transaction. + /// + /// Allows callers to propagate arbitrary errors out of transaction contexts + /// without losing information that might be valuable to the calling context, + /// such as "does this particular error indicate that the entire transaction + /// should retry?". + pub fn retryable(self) -> MaybeRetryable { + use MaybeRetryable::*; + + match self { + TransactionError::Database(err) if retryable(&err) => { + Retryable(err) + } + _ => NotRetryable(self), + } + } +} + impl From for TransactionError { fn from(err: PublicError) -> Self { TransactionError::CustomError(err) } } -impl TransactionError { - /// Based on [the CRDB][1] docs, return true if this transaction must be - /// retried. - /// - /// [1]: https://www.cockroachlabs.com/docs/v23.1/transaction-retry-error-reference#client-side-retry-handling - pub fn retry_transaction(&self) -> bool { - match &self { - Self::Database(DieselError::DatabaseError( - kind, - boxed_error_information, - )) => match kind { - DieselErrorKind::SerializationFailure => { - return boxed_error_information - .message() - .starts_with("restart transaction"); - } - _ => false, - }, - _ => false, +impl From> for PublicError { + fn from(err: TransactionError) -> Self { + match err { + TransactionError::CustomError(err) => err, + TransactionError::Database(err) => { + public_error_from_diesel(err, ErrorHandler::Server) + } } } } diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index b7c7079b54..e6b8743e94 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -17,7 +17,7 @@ mod config; mod cte_utils; // This is marked public for use by the integration tests pub mod datastore; -mod error; +pub(crate) mod error; mod explain; pub mod fixed_data; pub mod lookup; @@ -42,7 +42,7 @@ pub use nexus_db_model::schema; pub use crate::db::error::TransactionError; pub use config::Config; pub use datastore::DataStore; -pub use pool::Pool; +pub use pool::{DbConnection, Pool}; pub use saga_recovery::{recover, CompletionTask, RecoveryTask}; pub use saga_types::SecId; pub use sec_store::CockroachDbSecStore; diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 84a81a7b7a..1dbe57da6f 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -5,6 +5,7 @@ //! Queries for inserting and deleting network interfaces. use crate::db; +use crate::db::error::{public_error_from_diesel, retryable, ErrorHandler}; use crate::db::model::IncompleteNetworkInterface; use crate::db::pool::DbConnection; use crate::db::queries::next_item::DefaultShiftGenerator; @@ -120,6 +121,8 @@ pub enum InsertError { InstanceMustBeStopped(Uuid), /// The instance does not exist at all, or is in the destroyed state. InstanceNotFound(Uuid), + /// The operation occurred within a transaction, and is retryable + Retryable(DieselError), /// Any other error External(external::Error), } @@ -135,7 +138,6 @@ impl InsertError { e: DieselError, interface: &IncompleteNetworkInterface, ) -> Self { - use crate::db::error; match e { // Catch the specific errors designed to communicate the failures we // want to distinguish @@ -143,9 +145,9 @@ impl InsertError { decode_database_error(e, interface) } // Any other error at all is a bug - _ => InsertError::External(error::public_error_from_diesel( + _ => InsertError::External(public_error_from_diesel( e, - error::ErrorHandler::Server, + ErrorHandler::Server, )), } } @@ -209,6 +211,9 @@ impl InsertError { InsertError::InstanceNotFound(id) => { external::Error::not_found_by_id(external::ResourceType::Instance, &id) } + InsertError::Retryable(err) => { + public_error_from_diesel(err, ErrorHandler::Server) + } InsertError::External(e) => e, } } @@ -290,6 +295,10 @@ fn decode_database_error( r#"uuid: incorrect UUID length: non-unique-subnets"#, ); + if retryable(&err) { + return InsertError::Retryable(err); + } + match err { // If the address allocation subquery fails, we'll attempt to insert // NULL for the `ip` column. This checks that the non-NULL constraint on diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs index a693f7ff42..5d1927ebc7 100644 --- a/nexus/db-queries/src/lib.rs +++ b/nexus/db-queries/src/lib.rs @@ -9,6 +9,7 @@ pub mod authz; pub mod context; pub mod db; pub mod provisioning; +pub mod transaction_retry; #[macro_use] extern crate slog; diff --git a/nexus/db-queries/src/transaction_retry.rs b/nexus/db-queries/src/transaction_retry.rs new file mode 100644 index 0000000000..c474b729f8 --- /dev/null +++ b/nexus/db-queries/src/transaction_retry.rs @@ -0,0 +1,341 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helper types for performing automatic transaction retries + +use async_bb8_diesel::AsyncConnection; +use chrono::Utc; +use diesel::result::Error as DieselError; +use oximeter::{types::Sample, Metric, MetricsError, Target}; +use rand::{thread_rng, Rng}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +// Identifies "which" transaction is retrying +#[derive(Debug, Clone, Target)] +struct DatabaseTransaction { + name: String, +} + +// Identifies that a retry has occurred, and track how long +// the transaction took (either since starting, or since the last +// retry failure was recorded). +#[derive(Debug, Clone, Metric)] +struct RetryData { + #[datum] + latency: f64, + attempt: u32, +} + +// Collects all transaction retry samples +#[derive(Debug, Default, Clone)] +pub(crate) struct Producer { + samples: Arc>>, +} + +impl Producer { + pub(crate) fn new() -> Self { + Self { samples: Arc::new(Mutex::new(vec![])) } + } + + fn append( + &self, + transaction: &DatabaseTransaction, + data: &RetryData, + ) -> Result<(), MetricsError> { + let sample = Sample::new_with_timestamp(Utc::now(), transaction, data)?; + self.samples.lock().unwrap().push(sample); + Ok(()) + } +} + +struct RetryHelperInner { + start: chrono::DateTime, + attempts: u32, +} + +impl RetryHelperInner { + fn new() -> Self { + Self { start: Utc::now(), attempts: 1 } + } + + fn tick(&mut self) -> Self { + let start = self.start; + let attempts = self.attempts; + + self.start = Utc::now(); + self.attempts += 1; + + Self { start, attempts } + } +} + +/// Helper utility for tracking retry attempts and latency. +/// Intended to be used from within "transaction_async_with_retry". +pub struct RetryHelper { + producer: Producer, + name: &'static str, + inner: Mutex, +} + +const MIN_RETRY_BACKOFF: Duration = Duration::from_millis(0); +const MAX_RETRY_BACKOFF: Duration = Duration::from_millis(50); +const MAX_RETRY_ATTEMPTS: u32 = 10; + +impl RetryHelper { + /// Creates a new RetryHelper, and starts a timer tracking the transaction + /// duration. + pub(crate) fn new(producer: &Producer, name: &'static str) -> Self { + Self { + producer: producer.clone(), + name, + inner: Mutex::new(RetryHelperInner::new()), + } + } + + /// Calls the function "f" in an asynchronous, retryable transaction. + pub async fn transaction( + self, + conn: &async_bb8_diesel::Connection, + f: Func, + ) -> Result + where + R: Send + 'static, + Fut: std::future::Future> + Send, + Func: Fn(async_bb8_diesel::Connection) -> Fut + + Send + + Sync, + { + conn.transaction_async_with_retry(f, self.as_callback()).await + } + + // Called upon retryable transaction failure. + // + // This function: + // - Appends a metric identifying the duration of the transaction operation + // - Performs a random (uniform) backoff (limited to less than 50 ms) + // - Returns "true" if the transaction should be restarted + async fn retry_callback(&self) -> bool { + // Look at the current attempt and start time so we can log this + // information before we start sleeping. + let (start, attempt) = { + let inner = self.inner.lock().unwrap(); + (inner.start, inner.attempts) + }; + + let latency = (Utc::now() - start) + .to_std() + .unwrap_or(Duration::ZERO) + .as_secs_f64(); + + let _ = self.producer.append( + &DatabaseTransaction { name: self.name.into() }, + &RetryData { latency, attempt }, + ); + + // This backoff is not exponential, but I'm not sure we actually want + // that much backoff here. If we're repeatedly failing, it would + // probably be better to fail the operation, at which point Oximeter + // will keep track of the failing transaction and identify that it's a + // high-priority target for CTE conversion. + let duration = { + let mut rng = thread_rng(); + rng.gen_range(MIN_RETRY_BACKOFF..MAX_RETRY_BACKOFF) + }; + tokio::time::sleep(duration).await; + + // Now that we've finished sleeping, reset the timer and bump the number + // of attempts we've tried. + let inner = self.inner.lock().unwrap().tick(); + return inner.attempts < MAX_RETRY_ATTEMPTS; + } + + /// Converts this function to a retryable callback that can be used from + /// "transaction_async_with_retry". + pub(crate) fn as_callback( + self, + ) -> impl Fn() -> futures::future::BoxFuture<'static, bool> { + let r = Arc::new(self); + move || { + let r = r.clone(); + Box::pin(async move { r.retry_callback().await }) + } + } +} + +impl oximeter::Producer for Producer { + fn produce( + &mut self, + ) -> Result + 'static>, MetricsError> { + let samples = std::mem::take(&mut *self.samples.lock().unwrap()); + Ok(Box::new(samples.into_iter())) + } +} + +/// Helper utility for passing non-retryable errors out-of-band from +/// transactions. +/// +/// Transactions prefer to act on the `diesel::result::Error` type, +/// but transaction users may want more meaningful error types. +/// This utility helps callers safely propagate back Diesel errors while +/// retaining auxiliary error info. +pub struct OptionalError(Arc>>); + +impl Clone for OptionalError { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl OptionalError { + pub fn new() -> Self { + Self(Arc::new(Mutex::new(None))) + } + + /// Sets "Self" to the value of `error` and returns `DieselError::RollbackTransaction`. + pub fn bail(&self, err: E) -> DieselError { + (*self.0.lock().unwrap()).replace(err); + DieselError::RollbackTransaction + } + + /// If `diesel_error` is retryable, returns it without setting Self. + /// + /// Otherwise, sets "Self" to the value of `err`, and returns + /// `DieselError::RollbackTransaction`. + pub fn bail_retryable_or( + &self, + diesel_error: DieselError, + err: E, + ) -> DieselError { + self.bail_retryable_or_else(diesel_error, |_diesel_error| err) + } + + /// If `diesel_error` is retryable, returns it without setting Self. + /// + /// Otherwise, sets "Self" to the value of `f` applied to `diesel_err`, and + /// returns `DieselError::RollbackTransaction`. + pub fn bail_retryable_or_else( + &self, + diesel_error: DieselError, + f: F, + ) -> DieselError + where + F: FnOnce(DieselError) -> E, + { + if crate::db::error::retryable(&diesel_error) { + return diesel_error; + } else { + self.bail(f(diesel_error)) + } + } + + /// If "Self" was previously set to a non-retryable error, return it. + pub fn take(self) -> Option { + (*self.0.lock().unwrap()).take() + } +} + +#[cfg(test)] +mod test { + use super::*; + + use crate::db::datastore::datastore_test; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + use oximeter::types::FieldValue; + + // If a transaction is explicitly rolled back, we should not expect any + // samples to be taken. With no retries, this is just a normal operation + // failure. + #[tokio::test] + async fn test_transaction_rollback_produces_no_samples() { + let logctx = dev::test_setup_log( + "test_transaction_rollback_produces_no_samples", + ); + let mut db = test_setup_database(&logctx.log).await; + let (_opctx, datastore) = datastore_test(&logctx, &db).await; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + datastore + .transaction_retry_wrapper( + "test_transaction_rollback_produces_no_samples", + ) + .transaction(&conn, |_conn| async move { + Err::<(), _>(diesel::result::Error::RollbackTransaction) + }) + .await + .expect_err("Should have failed"); + + let samples = datastore + .transaction_retry_producer() + .samples + .lock() + .unwrap() + .clone(); + assert_eq!(samples, vec![]); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + // If a transaction fails with a retryable error, we record samples, + // providing oximeter-level information about the attempts. + #[tokio::test] + async fn test_transaction_retry_produces_samples() { + let logctx = + dev::test_setup_log("test_transaction_retry_produces_samples"); + let mut db = test_setup_database(&logctx.log).await; + let (_opctx, datastore) = datastore_test(&logctx, &db).await; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .transaction_retry_wrapper( + "test_transaction_retry_produces_samples", + ) + .transaction(&conn, |_conn| async move { + Err::<(), _>(diesel::result::Error::DatabaseError( + diesel::result::DatabaseErrorKind::SerializationFailure, + Box::new("restart transaction: Retry forever!".to_string()), + )) + }) + .await + .expect_err("Should have failed"); + + let samples = datastore + .transaction_retry_producer() + .samples + .lock() + .unwrap() + .clone(); + assert_eq!(samples.len(), MAX_RETRY_ATTEMPTS as usize); + + for i in 0..samples.len() { + let sample = &samples[i]; + + assert_eq!( + sample.timeseries_name, + "database_transaction:retry_data" + ); + + let target_fields = sample.sorted_target_fields(); + assert_eq!( + target_fields["name"].value, + FieldValue::String( + "test_transaction_retry_produces_samples".to_string() + ) + ); + + // Attempts are one-indexed + let metric_fields = sample.sorted_metric_fields(); + assert_eq!( + metric_fields["attempt"].value, + FieldValue::U32(u32::try_from(i).unwrap() + 1), + ); + } + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/background/dns_config.rs b/nexus/src/app/background/dns_config.rs index 654e9c0bf1..805ae813fe 100644 --- a/nexus/src/app/background/dns_config.rs +++ b/nexus/src/app/background/dns_config.rs @@ -166,7 +166,6 @@ mod test { use crate::app::background::init::test::read_internal_dns_zone_id; use crate::app::background::init::test::write_test_dns_generation; use assert_matches::assert_matches; - use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use diesel::ExpressionMethods; @@ -237,11 +236,11 @@ mod test { ); // Similarly, wipe all of the state and verify that we handle that okay. + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore - .pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + .transaction_retry_wrapper("dns_config_test_basic") + .transaction(&conn, |conn| async move { conn.batch_execute_async( nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, ) @@ -265,7 +264,7 @@ mod test { .execute_async(&conn) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>(()) + Ok(()) }) .await .unwrap(); diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index cfa023a013..d30d2162c4 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -247,14 +247,12 @@ fn init_dns( #[cfg(test)] pub mod test { - use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use dropshot::HandlerTaskMode; use nexus_db_model::DnsGroup; use nexus_db_model::Generation; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; - use nexus_db_queries::db::TransactionError; use nexus_test_utils_macros::nexus_test; use nexus_types::internal_api::params as nexus_params; use nexus_types::internal_api::params::ServiceKind; @@ -446,11 +444,11 @@ pub mod test { datastore: &DataStore, internal_dns_zone_id: Uuid, ) { - type TxnError = TransactionError<()>; { let conn = datastore.pool_connection_for_tests().await.unwrap(); - let _: Result<(), TxnError> = conn - .transaction_async(|conn| async move { + let _: Result<(), _> = datastore + .transaction_retry_wrapper("write_test_dns_generation") + .transaction(&conn, |conn| async move { { use nexus_db_queries::db::model::DnsVersion; use nexus_db_queries::db::schema::dns_version::dsl; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index fe403a7d41..4883afaddc 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -830,9 +830,7 @@ pub(crate) mod test { app::saga::create_saga_dag, app::sagas::disk_create::Params, app::sagas::disk_create::SagaDiskCreate, external_api::params, }; - use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use diesel::{ ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, }; @@ -972,27 +970,25 @@ pub(crate) mod test { use nexus_db_queries::db::model::VirtualProvisioningCollection; use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore - .pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + .transaction_retry_wrapper( + "no_virtual_provisioning_collection_records_using_storage", + ) + .transaction(&conn, |conn| async move { conn.batch_execute_async( nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, ) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( - dsl::virtual_provisioning_collection - .filter(dsl::virtual_disk_bytes_provisioned.ne(0)) - .select(VirtualProvisioningCollection::as_select()) - .get_results_async::( - &conn, - ) - .await - .unwrap() - .is_empty(), - ) + Ok(dsl::virtual_provisioning_collection + .filter(dsl::virtual_disk_bytes_provisioned.ne(0)) + .select(VirtualProvisioningCollection::as_select()) + .get_results_async::(&conn) + .await + .unwrap() + .is_empty()) }) .await .unwrap() diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 153e0323e7..8c2f96c36c 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -866,9 +866,7 @@ pub mod test { app::sagas::instance_create::SagaInstanceCreate, app::sagas::test_helpers, external_api::params, }; - use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use diesel::{ BoolExpressionMethods, ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, @@ -1013,30 +1011,28 @@ pub mod test { use nexus_db_queries::db::model::SledResource; use nexus_db_queries::db::schema::sled_resource::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore - .pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + .transaction_retry_wrapper( + "no_sled_resource_instance_records_exist", + ) + .transaction(&conn, |conn| async move { conn.batch_execute_async( nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, ) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( - dsl::sled_resource - .filter( - dsl::kind.eq( - nexus_db_queries::db::model::SledResourceKind::Instance, - ), - ) - .select(SledResource::as_select()) - .get_results_async::(&conn) - .await - .unwrap() - .is_empty(), - ) + Ok(dsl::sled_resource + .filter(dsl::kind.eq( + nexus_db_queries::db::model::SledResourceKind::Instance, + )) + .select(SledResource::as_select()) + .get_results_async::(&conn) + .await + .unwrap() + .is_empty()) }) .await .unwrap() @@ -1048,16 +1044,17 @@ pub mod test { use nexus_db_queries::db::model::VirtualProvisioningResource; use nexus_db_queries::db::schema::virtual_provisioning_resource::dsl; - datastore.pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + datastore + .transaction_retry_wrapper("no_virtual_provisioning_resource_records_exist") + .transaction(&conn, |conn| async move { conn .batch_execute_async(nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( + Ok( dsl::virtual_provisioning_resource .filter(dsl::resource_type.eq(nexus_db_queries::db::model::ResourceTypeProvisioned::Instance.to_string())) .select(VirtualProvisioningResource::as_select()) @@ -1075,31 +1072,29 @@ pub mod test { use nexus_db_queries::db::model::VirtualProvisioningCollection; use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore - .pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + .transaction_retry_wrapper( + "no_virtual_provisioning_collection_records_using_instances", + ) + .transaction(&conn, |conn| async move { conn.batch_execute_async( nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, ) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( - dsl::virtual_provisioning_collection - .filter( - dsl::cpus_provisioned - .ne(0) - .or(dsl::ram_provisioned.ne(0)), - ) - .select(VirtualProvisioningCollection::as_select()) - .get_results_async::( - &conn, - ) - .await - .unwrap() - .is_empty(), - ) + Ok(dsl::virtual_provisioning_collection + .filter( + dsl::cpus_provisioned + .ne(0) + .or(dsl::ram_provisioned.ne(0)), + ) + .select(VirtualProvisioningCollection::as_select()) + .get_results_async::(&conn) + .await + .unwrap() + .is_empty()) }) .await .unwrap() diff --git a/nexus/src/app/sagas/project_create.rs b/nexus/src/app/sagas/project_create.rs index 135e20ff06..40acc822c0 100644 --- a/nexus/src/app/sagas/project_create.rs +++ b/nexus/src/app/sagas/project_create.rs @@ -157,9 +157,7 @@ mod test { app::saga::create_saga_dag, app::sagas::project_create::Params, app::sagas::project_create::SagaProjectCreate, external_api::params, }; - use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use diesel::{ ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, }; @@ -233,15 +231,16 @@ mod test { use nexus_db_queries::db::model::VirtualProvisioningCollection; use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; - datastore.pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + datastore + .transaction_retry_wrapper("no_virtual_provisioning_collection_records_for_projects") + .transaction(&conn, |conn| async move { conn .batch_execute_async(nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL) .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( + Ok( dsl::virtual_provisioning_collection .filter(dsl::collection_type.eq(nexus_db_queries::db::model::CollectionTypeProvisioned::Project.to_string())) // ignore built-in services project diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index eccb013b66..3110bd318a 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -10,9 +10,7 @@ use crate::{ app::{saga::create_saga_dag, test_interfaces::TestInterfaces as _}, Nexus, }; -use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, -}; +use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use futures::future::BoxFuture; use nexus_db_queries::{ @@ -434,11 +432,10 @@ pub(crate) async fn assert_no_failed_undo_steps( ) { use nexus_db_queries::db::model::saga_types::SagaNodeEvent; + let conn = datastore.pool_connection_for_tests().await.unwrap(); let saga_node_events: Vec = datastore - .pool_connection_for_tests() - .await - .unwrap() - .transaction_async(|conn| async move { + .transaction_retry_wrapper("assert_no_failed_undo_steps") + .transaction(&conn, |conn| async move { use nexus_db_queries::db::schema::saga_node_event::dsl; conn.batch_execute_async( @@ -447,14 +444,12 @@ pub(crate) async fn assert_no_failed_undo_steps( .await .unwrap(); - Ok::<_, nexus_db_queries::db::TransactionError<()>>( - dsl::saga_node_event - .filter(dsl::event_type.eq(String::from("undo_failed"))) - .select(SagaNodeEvent::as_select()) - .load_async::(&conn) - .await - .unwrap(), - ) + Ok(dsl::saga_node_event + .filter(dsl::event_type.eq(String::from("undo_failed"))) + .select(SagaNodeEvent::as_select()) + .load_async::(&conn) + .await + .unwrap()) }) .await .unwrap(); From 5185ab947941e79abc2446ee12e5111e698fb3a9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 05:17:50 +0000 Subject: [PATCH 054/186] Update taiki-e/install-action digest to d140130 (#4622) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`d211c4b` -> `d140130`](https://togithub.com/taiki-e/install-action/compare/d211c4b...d140130) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 70b57a450a..0d1aec4c16 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@d211c4be5a95cbcd52a0870dda7d63a107a58368 # v2 + uses: taiki-e/install-action@d140130aeedb5a946a5769684d32e3a33539f226 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 001143cf3b686b8cb4a72916e9f775a0a186df9b Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 21:23:47 -0800 Subject: [PATCH 055/186] Update Rust crate filetime to 0.2.23 (#4623) --- Cargo.lock | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 2 +- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13b3d6c74e..981dd99082 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2173,14 +2173,14 @@ checksum = "d0870c84016d4b481be5c9f323c24f65e31e901ae618f0e80f4308fb00de1d2d" [[package]] name = "filetime" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if 1.0.0", "libc", - "redox_syscall 0.3.5", - "windows-sys 0.48.0", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", ] [[package]] @@ -9652,6 +9652,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -9682,6 +9691,21 @@ dependencies = [ "windows_x86_64_msvc 0.48.5", ] +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -9694,6 +9718,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -9706,6 +9736,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -9718,6 +9754,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -9730,6 +9772,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -9742,6 +9790,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -9754,6 +9808,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -9766,6 +9826,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "winnow" version = "0.5.15" diff --git a/Cargo.toml b/Cargo.toml index c88502bb1c..3a80367806 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -191,7 +191,7 @@ dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", either = "1.9.0" expectorate = "1.1.0" fatfs = "0.3.6" -filetime = "0.2.22" +filetime = "0.2.23" flate2 = "1.0.28" flume = "0.11.0" foreign-types = "0.3.2" From bcd7ac5d5e10355774310aa583aa59253eb880e5 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 6 Dec 2023 10:03:40 -0800 Subject: [PATCH 056/186] [nexus] Bump query limit for uninitialized sleds (#4626) Fixes #4621, but in a quick-and-cheesy-feels-bad way that needs more thought to fix more elegantly. --- nexus/src/app/rack.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 95283faa1c..3804841feb 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -869,7 +869,17 @@ impl super::Nexus { ) -> ListResultVec { debug!(self.log, "Getting latest collection"); // Grab the SPs from the last collection - let limit = NonZeroU32::new(50).unwrap(); + // + // We set a limit of 200 here to give us some breathing room when + // querying for cabooses and RoT pages, each of which is "4 per SP/RoT", + // which in a single fully populated rack works out to (32 sleds + 2 + // switches + 1 psc) * 4 = 140. + // + // This feels bad and probably needs more thought; see + // https://github.com/oxidecomputer/omicron/issues/4621 where this limit + // being too low bit us, and it will link to a more general followup + // issue. + let limit = NonZeroU32::new(200).unwrap(); let collection = self .db_datastore .inventory_get_latest_collection(opctx, limit) From 2a0595c70e16126d88a2b0140422cb052d8ce0ce Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:13:28 -0600 Subject: [PATCH 057/186] Bugfixes for #4589 and #4611 (#4610) Bugfix for issue #4589. The root cause `ensure_ipv4_nat_entry` previously would match against *any* existing table entries with the matching parameters. We need it to only match against entries that are *active*, or in implementation terms, entries whose `version_removed` column is `NULL`. The events triggering the bug is as follows: 1. User creates a new instance, eventually triggering the creation of new ipv4 nat entries, which are reconciled by the downstream dendrite workflow. 2. User stops the instance. This triggers the soft-deletion of the ipv4 nat entries, which are again reconciled by the downstream dendrite workflow. 3. The user restarts the instance. In the event that Nexus places the instance back on the same sled as last time, the `external_ip` may have the same parameters used by the soft-deleted nat records. Since we previously were not filtering for `version_removed = NULL` in `ensure_ipv4_nat_entry`, the soft-deleted records would still be treated as "live" in our db query, causing Nexus to skip inserting new nat records when the instance restarts. This PR should resolve this unwanted behavior. However, a second issue was noticed during verification of the bug fix. I noticed that when running `swadm nat list`, the entries did not re-appear in the output even though `dendrite` was indeed picking up the new additions and configuring the softnpu asic accordingly. I believe this was also something @askfongjojo reported in chat. This means that we could have live entries on the switch and external traffic flowing to an instance, even though the nat entry is not appearing in `swadm nat list`. This PR also includes an upgraded dendrite that resolves that bug. --- .../src/db/datastore/ipv4_nat_entry.rs | 150 +++++++++++++++++- nexus/src/app/sagas/instance_delete.rs | 31 ++++ package-manifest.toml | 12 +- tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +- 5 files changed, 189 insertions(+), 12 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index 274937b299..1caf5617bb 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -36,6 +36,7 @@ impl DataStore { .filter(dsl::sled_address.eq(nat_entry.sled_address)) .filter(dsl::vni.eq(nat_entry.vni)) .filter(dsl::mac.eq(nat_entry.mac)) + .filter(dsl::version_removed.is_null()) .select(( dsl::external_address, dsl::first_port, @@ -275,7 +276,7 @@ mod test { use crate::db::datastore::datastore_test; use chrono::Utc; - use nexus_db_model::{Ipv4NatValues, MacAddr, Vni}; + use nexus_db_model::{Ipv4NatEntry, Ipv4NatValues, MacAddr, Vni}; use nexus_test_utils::db::test_setup_database; use omicron_common::api::external; use omicron_test_utils::dev; @@ -427,7 +428,6 @@ mod test { datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); assert_eq!(nat_entries.len(), 1); - // version should be unchanged assert_eq!( datastore.ipv4_nat_current_version(&opctx).await.unwrap(), @@ -437,4 +437,150 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + /// Table design and queries should only insert one active NAT entry for a given + /// set of properties, but allow multiple deleted nat entries for the same set + /// of properties. + async fn table_allows_unique_active_multiple_deleted() { + let logctx = dev::test_setup_log("test_nat_version_tracking"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // Each change (creation / deletion) to the NAT table should increment the + // version number of the row in the NAT table + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + + // Try to add it again. It should still only result in a single entry. + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + let first_entry = datastore + .ipv4_nat_find_by_values(&opctx, nat1.clone()) + .await + .unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone one change. One entry has been added, + // none deleted, so we should be at version 1. + assert_eq!(nat_entries.len(), 1); + assert_eq!(nat_entries.last().unwrap().version_added, 1); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 1 + ); + + datastore.ipv4_nat_delete(&opctx, &first_entry).await.unwrap(); + + // The NAT table has undergone two changes. One entry has been added, + // then deleted, so we should be at version 2. + let nat_entries = datastore + .ipv4_nat_list_since_version(&opctx, 0, 10) + .await + .unwrap() + .into_iter(); + + let active: Vec = nat_entries + .clone() + .filter(|entry| entry.version_removed.is_none()) + .collect(); + + let inactive: Vec = nat_entries + .filter(|entry| entry.version_removed.is_some()) + .collect(); + + assert!(active.is_empty()); + assert_eq!(inactive.len(), 1); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 2 + ); + + // Add the same entry back. This simulates the behavior we will see + // when stopping and then restarting an instance. + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + + // The NAT table has undergone three changes. + let nat_entries = datastore + .ipv4_nat_list_since_version(&opctx, 0, 10) + .await + .unwrap() + .into_iter(); + + let active: Vec = nat_entries + .clone() + .filter(|entry| entry.version_removed.is_none()) + .collect(); + + let inactive: Vec = nat_entries + .filter(|entry| entry.version_removed.is_some()) + .collect(); + + assert_eq!(active.len(), 1); + assert_eq!(inactive.len(), 1); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + let second_entry = + datastore.ipv4_nat_find_by_values(&opctx, nat1).await.unwrap(); + datastore.ipv4_nat_delete(&opctx, &second_entry).await.unwrap(); + + // The NAT table has undergone four changes + let nat_entries = datastore + .ipv4_nat_list_since_version(&opctx, 0, 10) + .await + .unwrap() + .into_iter(); + + let active: Vec = nat_entries + .clone() + .filter(|entry| entry.version_removed.is_none()) + .collect(); + + let inactive: Vec = nat_entries + .filter(|entry| entry.version_removed.is_some()) + .collect(); + + assert_eq!(active.len(), 0); + assert_eq!(inactive.len(), 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 4 + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 1605465c74..e35b922c87 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,6 +8,7 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::{authn, authz, db}; use omicron_common::api::external::{Error, ResourceType}; use omicron_common::api::internal::shared::SwitchLocation; @@ -39,6 +40,9 @@ declare_saga_actions! { DEALLOCATE_EXTERNAL_IP -> "no_result3" { + sid_deallocate_external_ip } + INSTANCE_DELETE_NAT -> "no_result4" { + + sid_delete_nat + } } // instance delete saga: definition @@ -57,6 +61,7 @@ impl NexusSaga for SagaInstanceDelete { _params: &Self::Params, mut builder: steno::DagBuilder, ) -> Result { + builder.append(instance_delete_nat_action()); builder.append(instance_delete_record_action()); builder.append(delete_network_interfaces_action()); builder.append(deallocate_external_ip_action()); @@ -110,6 +115,32 @@ async fn sid_delete_network_interfaces( Ok(()) } +async fn sid_delete_nat( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let instance_id = params.authz_instance.id(); + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .instance_delete_dpd_config(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + async fn sid_deallocate_external_ip( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/package-manifest.toml b/package-manifest.toml index 3bce4aafee..37ae1100f8 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" -source.sha256 = "dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" +source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" +source.sha256 = "06b5eeedaebf30e96a5c5e932e08034c90947af7a54e9bc04d57d6807013ade9" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" -source.sha256 = "c34b10d47fa3eb9f9f6b3655ea4ed8a726f93399ea177efea79f5c89f2ab5a1e" +source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" +source.sha256 = "51be0b0342bc7cdf927797af45af3bc82861bb8efb174d50958cb16b5620c51d" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" -source.sha256 = "ce7065227c092ee82704f39a966b7441e3ae82d75eedb6eb281bd8b3e5873e32" +source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" +source.sha256 = "9afb24cdae27755eaf86a856268686bb641048b5d450dae858cf47b9daaa46ed" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index c2dda4dbd0..b6dc45a8d0 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2af6adea85c62ac37e451148b84e5eb0ef005f36" +COMMIT="1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 77ee198fc5..95f04db9e8 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" -CIDL_SHA256_LINUX_DPD="b13b391a085ba6bf16fdd99774f64c9d53cd7220ad518d5839c8558fb925c40c" -CIDL_SHA256_LINUX_SWADM="6bfa4e367eb2b0be89f1588ac458026a186314597a4feb9fee6cea60101c7ebe" +CIDL_SHA256_ILLUMOS="06b5eeedaebf30e96a5c5e932e08034c90947af7a54e9bc04d57d6807013ade9" +CIDL_SHA256_LINUX_DPD="99a800cbd5739245154831004892d47be5a871e37c536ec3009911ddb02fdb16" +CIDL_SHA256_LINUX_SWADM="e92bfc071f3944523a2e69b13ee877a4fd87cb8a9a78011b4aa8f40218347e25" From 1a3443cccfbf4428397e9e9c2abe448da19d735d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 6 Dec 2023 23:22:41 +0000 Subject: [PATCH 058/186] (Multiple) Floating IP Support (#4559) --- .github/buildomat/jobs/deploy.sh | 2 +- Cargo.lock | 220 +++++---- Cargo.toml | 4 +- common/src/api/external/mod.rs | 1 + illumos-utils/src/opte/port_manager.rs | 41 +- nexus/db-model/src/external_ip.rs | 140 ++++++ nexus/db-model/src/schema.rs | 23 +- nexus/db-queries/src/authz/api_resources.rs | 8 + nexus/db-queries/src/authz/oso_generic.rs | 1 + .../src/authz/policy_test/resources.rs | 7 + .../src/db/datastore/external_ip.rs | 262 ++++++++++- nexus/db-queries/src/db/datastore/mod.rs | 313 +++++++------ nexus/db-queries/src/db/lookup.rs | 16 +- .../db-queries/src/db/queries/external_ip.rs | 10 +- nexus/db-queries/tests/output/authz-roles.out | 42 ++ nexus/src/app/external_ip.rs | 87 ++++ nexus/src/app/instance.rs | 45 +- nexus/src/app/mod.rs | 9 +- nexus/src/app/sagas/instance_create.rs | 77 +++- nexus/src/app/sagas/instance_delete.rs | 5 + nexus/src/external_api/http_entrypoints.rs | 125 +++++ nexus/src/external_api/tag-config.json | 6 + nexus/test-utils/src/resource_helpers.rs | 25 + nexus/tests/integration_tests/endpoints.rs | 40 ++ nexus/tests/integration_tests/external_ips.rs | 432 ++++++++++++++++++ nexus/tests/integration_tests/instances.rs | 157 ++++++- nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/unauthorized.rs | 6 + nexus/tests/output/nexus_tags.txt | 7 + nexus/types/src/external_api/params.rs | 32 +- nexus/types/src/external_api/views.rs | 16 + openapi/nexus.json | 334 ++++++++++++++ openapi/sled-agent.json | 17 +- schema/crdb/19.0.0/up01.sql | 1 + schema/crdb/19.0.0/up02.sql | 4 + schema/crdb/19.0.0/up03.sql | 6 + schema/crdb/19.0.0/up04.sql | 7 + schema/crdb/19.0.0/up05.sql | 19 + schema/crdb/19.0.0/up06.sql | 3 + schema/crdb/dbinit.sql | 50 +- sled-agent/src/instance.rs | 19 +- sled-agent/src/params.rs | 3 +- sled-agent/src/services.rs | 8 +- tools/opte_version | 2 +- 44 files changed, 2339 insertions(+), 294 deletions(-) create mode 100644 nexus/tests/integration_tests/external_ips.rs create mode 100644 schema/crdb/19.0.0/up01.sql create mode 100644 schema/crdb/19.0.0/up02.sql create mode 100644 schema/crdb/19.0.0/up03.sql create mode 100644 schema/crdb/19.0.0/up04.sql create mode 100644 schema/crdb/19.0.0/up05.sql create mode 100644 schema/crdb/19.0.0/up06.sql diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index ff9b44fc40..3c4b3d88c8 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.25" +#: target = "lab-2.0-opte-0.27" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", diff --git a/Cargo.lock b/Cargo.lock index 981dd99082..a126f82300 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,7 +33,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cipher", "cpufeatures", ] @@ -58,7 +58,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", "version_check", ] @@ -381,7 +381,7 @@ checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", "cc", - "cfg-if 1.0.0", + "cfg-if", "libc", "miniz_oxide", "object 0.32.1", @@ -847,12 +847,6 @@ dependencies = [ "nom", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -865,7 +859,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cipher", "cpufeatures", ] @@ -1003,6 +997,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -1125,7 +1125,7 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -1190,7 +1190,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-channel", "crossbeam-deque", "crossbeam-epoch", @@ -1204,7 +1204,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -1214,7 +1214,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] @@ -1226,7 +1226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", "memoffset 0.9.0", "scopeguard", @@ -1238,7 +1238,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -1248,7 +1248,7 @@ version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -1364,7 +1364,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6bd9c8e659a473bce955ae5c35b116af38af11a7acb0b480e01f3ed348aeb40" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "memchr", ] @@ -1383,7 +1383,7 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622178105f911d937a42cdb140730ba4a3ed2becd8ae6ce39c7d28b5d75d4588" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "curve25519-dalek-derive", "digest", @@ -1482,7 +1482,7 @@ version = "5.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "hashbrown 0.14.2", "lock_api", "once_cell", @@ -1550,6 +1550,38 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe7ed1d93f4553003e20b629abe9085e1e81b1429520f897f8f8860bc6dfc21" +[[package]] +name = "defmt" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a2d011b2fee29fb7d659b83c43fce9a2cb4df453e16d441a51448e448f3f98" +dependencies = [ + "bitflags 1.3.2", + "defmt-macros", +] + +[[package]] +name = "defmt-macros" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0216f6c5acb5ae1a47050a6645024e6edafc2ee32d421955eccfef12ef92e" +dependencies = [ + "defmt-parser", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.32", +] + +[[package]] +name = "defmt-parser" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269924c02afd7f94bc4cecbfa5c379f6ffcf9766b3408fe63d22c728654eccd0" +dependencies = [ + "thiserror", +] + [[package]] name = "der" version = "0.7.8" @@ -1729,7 +1761,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "dirs-sys-next", ] @@ -2007,6 +2039,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + [[package]] name = "ena" version = "0.14.2" @@ -2028,7 +2066,7 @@ version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -2150,7 +2188,7 @@ version = "3.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -2177,7 +2215,7 @@ version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall 0.4.1", "windows-sys 0.52.0", @@ -2554,7 +2592,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -2565,7 +2603,7 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", @@ -3061,7 +3099,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" [[package]] name = "illumos-utils" @@ -3073,7 +3111,7 @@ dependencies = [ "byteorder", "camino", "camino-tempfile", - "cfg-if 1.0.0", + "cfg-if", "crucible-smf", "futures", "ipnetwork", @@ -3276,7 +3314,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -3464,10 +3502,10 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" dependencies = [ "quote", - "syn 1.0.109", + "syn 2.0.32", ] [[package]] @@ -3557,7 +3595,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "winapi", ] @@ -3573,7 +3611,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/netadm-sys#f114bd0d543d886cd453932e9f0967de57289bc2" dependencies = [ "anyhow", - "cfg-if 1.0.0", + "cfg-if", "colored", "dlpi", "libc", @@ -3852,7 +3890,7 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "downcast", "fragile", "lazy_static", @@ -3867,7 +3905,7 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "proc-macro2", "quote", "syn 1.0.109", @@ -4200,7 +4238,7 @@ version = "0.26.2" source = "git+https://github.com/jgallagher/nix?branch=r0.26-illumos#c1a3636db0524f194b714cfd117cd9b637b8b10e" dependencies = [ "bitflags 1.3.2", - "cfg-if 1.0.0", + "cfg-if", "libc", "memoffset 0.7.1", "pin-utils", @@ -4808,7 +4846,7 @@ dependencies = [ "camino", "camino-tempfile", "cancel-safe-futures", - "cfg-if 1.0.0", + "cfg-if", "chrono", "clap 4.4.3", "crucible-agent-client", @@ -5093,7 +5131,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" dependencies = [ "bitflags 2.4.0", - "cfg-if 1.0.0", + "cfg-if", "foreign-types 0.3.2", "libc", "once_cell", @@ -5133,37 +5171,35 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" dependencies = [ - "cfg-if 0.1.10", + "cfg-if", "dyn-clone", "illumos-sys-hdrs", "kstat-macro", "opte-api", "postcard", "serde", - "smoltcp 0.8.2", + "smoltcp 0.10.0", "version_check", - "zerocopy 0.6.4", ] [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" dependencies = [ - "cfg-if 0.1.10", "illumos-sys-hdrs", "ipnetwork", "postcard", "serde", - "smoltcp 0.8.2", + "smoltcp 0.10.0", ] [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" dependencies = [ "libc", "libnet", @@ -5237,14 +5273,13 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=258a8b59902dd36fc7ee5425e6b1fb5fc80d4649#258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" +source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" dependencies = [ - "cfg-if 0.1.10", "illumos-sys-hdrs", "opte", "serde", - "smoltcp 0.8.2", - "zerocopy 0.6.4", + "smoltcp 0.10.0", + "zerocopy 0.7.26", ] [[package]] @@ -5363,7 +5398,7 @@ dependencies = [ name = "oximeter-instruments" version = "0.1.0" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "chrono", "dropshot", "futures", @@ -5471,7 +5506,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "instant", "libc", "redox_syscall 0.2.16", @@ -5485,7 +5520,7 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall 0.3.5", "smallvec 1.11.0", @@ -5834,7 +5869,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d52cff9d1d4dee5fe6d03729099f4a310a41179e0a10dbf542039873f2e826fb" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "opaque-debug", "universal-hash", @@ -5848,20 +5883,15 @@ checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b" [[package]] name = "postcard" -version = "0.7.3" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25c0b0ae06fcffe600ad392aabfa535696c8973f2253d9ac83171924c58a858" +checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8" dependencies = [ - "postcard-cobs", + "cobs", + "embedded-io", "serde", ] -[[package]] -name = "postcard-cobs" -version = "0.1.5-pre" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c68cb38ed13fd7bc9dd5db8f165b7c8d9c1a315104083a2b10f11354c2af97f" - [[package]] name = "postgres-protocol" version = "0.6.6" @@ -6702,7 +6732,7 @@ version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "glob", "proc-macro2", "quote", @@ -7369,7 +7399,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "digest", ] @@ -7380,7 +7410,7 @@ version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "digest", ] @@ -7541,7 +7571,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "cfg-if 1.0.0", + "cfg-if", "futures", "illumos-devinfo", "illumos-utils", @@ -7569,7 +7599,7 @@ dependencies = [ "async-trait", "camino", "camino-tempfile", - "cfg-if 1.0.0", + "cfg-if", "derive_more", "glob", "illumos-utils", @@ -7726,24 +7756,27 @@ dependencies = [ [[package]] name = "smoltcp" -version = "0.8.2" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee34c1e1bfc7e9206cc0fb8030a90129b4e319ab53856249bb27642cab914fb3" +checksum = "7e9786ac45091b96f946693e05bfa4d8ca93e2d3341237d97a380107a6b38dea" dependencies = [ "bitflags 1.3.2", "byteorder", + "cfg-if", + "heapless", "managed", ] [[package]] name = "smoltcp" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9786ac45091b96f946693e05bfa4d8ca93e2d3341237d97a380107a6b38dea" +checksum = "8d2e3a36ac8fea7b94e666dfa3871063d6e0a5c9d5d4fec9a1a6b7b6760f0229" dependencies = [ "bitflags 1.3.2", "byteorder", - "cfg-if 1.0.0", + "cfg-if", + "defmt", "heapless", "managed", ] @@ -8178,7 +8211,7 @@ version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "fastrand", "redox_syscall 0.4.1", "rustix 0.38.25", @@ -8319,7 +8352,7 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", ] @@ -8685,7 +8718,7 @@ version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -8718,7 +8751,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c408c32e6a9dbb38037cece35740f2cf23c875d8ca134d33631cec83f74d3fe" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "data-encoding", "futures-channel", "futures-util", @@ -8739,7 +8772,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f7f83d1e4a0e4358ac54c5c3681e5d7da5efc5a7a632c90bb6d6669ddd9bc26" dependencies = [ "async-trait", - "cfg-if 1.0.0", + "cfg-if", "data-encoding", "enum-as-inner", "futures-channel", @@ -8763,7 +8796,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aff21aa4dcefb0a1afbfac26deb0adc93888c7d295fb63ab273ef276ba2b7cfe" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "futures-util", "ipconfig", "lazy_static", @@ -8785,7 +8818,7 @@ checksum = "99022f9befa6daec2a860be68ac28b1f0d9d7ccf441d8c5a695e35a58d88840d" dependencies = [ "async-trait", "bytes", - "cfg-if 1.0.0", + "cfg-if", "enum-as-inner", "futures-executor", "futures-util", @@ -9300,7 +9333,7 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "wasm-bindgen-macro", ] @@ -9325,7 +9358,7 @@ version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "wasm-bindgen", "web-sys", @@ -9847,7 +9880,7 @@ version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "windows-sys 0.48.0", ] @@ -9928,6 +9961,16 @@ dependencies = [ "zerocopy-derive 0.6.4", ] +[[package]] +name = "zerocopy" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +dependencies = [ + "byteorder", + "zerocopy-derive 0.7.26", +] + [[package]] name = "zerocopy-derive" version = "0.2.0" @@ -9950,6 +9993,17 @@ dependencies = [ "syn 2.0.32", ] +[[package]] +name = "zerocopy-derive" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.32", +] + [[package]] name = "zeroize" version = "1.7.0" diff --git a/Cargo.toml b/Cargo.toml index 3a80367806..c0935aec6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -260,7 +260,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.9.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b", features = [ "api", "std" ] } once_cell = "1.18.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0-rc.1" @@ -268,7 +268,7 @@ openapiv3 = "2.0.0-rc.1" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index db5272cd6e..50516a5da4 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -752,6 +752,7 @@ pub enum ResourceType { Zpool, Vmm, Ipv4NatEntry, + FloatingIp, } // IDENTITY METADATA diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index f0a8d8d839..3558ef1c78 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -20,6 +20,7 @@ use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::api::internal::shared::SourceNatConfig; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::IpCfg; use oxide_vpc::api::IpCidr; use oxide_vpc::api::Ipv4Cfg; @@ -99,7 +100,8 @@ impl PortManager { &self, nic: &NetworkInterface, source_nat: Option, - external_ips: &[IpAddr], + ephemeral_ip: Option, + floating_ips: &[IpAddr], firewall_rules: &[VpcFirewallRule], dhcp_config: DhcpCfg, ) -> Result<(Port, PortTicket), Error> { @@ -111,13 +113,6 @@ impl PortManager { let boundary_services = default_boundary_services(); // Describe the external IP addresses for this port. - // - // Note that we're currently only taking the first address, which is all - // that OPTE supports. The array is guaranteed to be limited by Nexus. - // See https://github.com/oxidecomputer/omicron/issues/1467 - // See https://github.com/oxidecomputer/opte/issues/196 - let external_ip = external_ips.get(0); - macro_rules! ip_cfg { ($ip:expr, $log_prefix:literal, $ip_t:path, $cidr_t:path, $ipcfg_e:path, $ipcfg_t:ident, $snat_t:ident) => {{ @@ -152,25 +147,43 @@ impl PortManager { } None => None, }; - let external_ip = match external_ip { - Some($ip_t(ip)) => Some((*ip).into()), + let ephemeral_ip = match ephemeral_ip { + Some($ip_t(ip)) => Some(ip.into()), Some(_) => { error!( self.inner.log, - concat!($log_prefix, " external IP"); - "external_ip" => ?external_ip, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, ); return Err(Error::InvalidPortIpConfig); } None => None, }; + let floating_ips: Vec<_> = floating_ips + .iter() + .copied() + .map(|ip| match ip { + $ip_t(ip) => Ok(ip.into()), + _ => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + Err(Error::InvalidPortIpConfig) + } + }) + .collect::, _>>()?; $ipcfg_e($ipcfg_t { vpc_subnet, private_ip: $ip.into(), gateway_ip: gateway_ip.into(), - snat, - external_ips: external_ip, + external_ips: ExternalIpCfg { + ephemeral_ip, + snat, + floating_ips, + }, }) }} } diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 1152e0109c..1a755f0396 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -7,10 +7,12 @@ use crate::impl_enum_type; use crate::schema::external_ip; +use crate::schema::floating_ip; use crate::Name; use crate::SqlU16; use chrono::DateTime; use chrono::Utc; +use db_macros::Resource; use diesel::Queryable; use diesel::Selectable; use ipnetwork::IpNetwork; @@ -18,6 +20,9 @@ use nexus_types::external_api::shared; use nexus_types::external_api::views; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; +use omicron_common::api::external::IdentityMetadata; +use serde::Deserialize; +use serde::Serialize; use std::convert::TryFrom; use std::net::IpAddr; use uuid::Uuid; @@ -69,6 +74,30 @@ pub struct ExternalIp { pub ip: IpNetwork, pub first_port: SqlU16, pub last_port: SqlU16, + // Only Some(_) for instance Floating IPs + pub project_id: Option, +} + +/// A view type constructed from `ExternalIp` used to represent Floating IP +/// objects in user-facing APIs. +/// +/// This View type fills a similar niche to `ProjectImage` etc.: we need to +/// represent identity as non-nullable (ditto for parent project) so as to +/// play nicely with authz and resource APIs. +#[derive( + Queryable, Selectable, Clone, Debug, Resource, Serialize, Deserialize, +)] +#[diesel(table_name = floating_ip)] +pub struct FloatingIp { + #[diesel(embed)] + pub identity: FloatingIpIdentity, + + pub ip_pool_id: Uuid, + pub ip_pool_range_id: Uuid, + pub is_service: bool, + pub parent_id: Option, + pub ip: IpNetwork, + pub project_id: Uuid, } impl From for sled_agent_client::types::SourceNatConfig { @@ -93,6 +122,7 @@ pub struct IncompleteExternalIp { is_service: bool, parent_id: Option, pool_id: Uuid, + project_id: Option, // Optional address requesting that a specific IP address be allocated. explicit_ip: Option, // Optional range when requesting a specific SNAT range be allocated. @@ -114,6 +144,7 @@ impl IncompleteExternalIp { is_service: false, parent_id: Some(instance_id), pool_id, + project_id: None, explicit_ip: None, explicit_port_range: None, } @@ -129,6 +160,7 @@ impl IncompleteExternalIp { is_service: false, parent_id: Some(instance_id), pool_id, + project_id: None, explicit_ip: None, explicit_port_range: None, } @@ -138,6 +170,7 @@ impl IncompleteExternalIp { id: Uuid, name: &Name, description: &str, + project_id: Uuid, pool_id: Uuid, ) -> Self { Self { @@ -149,11 +182,35 @@ impl IncompleteExternalIp { is_service: false, parent_id: None, pool_id, + project_id: Some(project_id), explicit_ip: None, explicit_port_range: None, } } + pub fn for_floating_explicit( + id: Uuid, + name: &Name, + description: &str, + project_id: Uuid, + explicit_ip: IpAddr, + pool_id: Uuid, + ) -> Self { + Self { + id, + name: Some(name.clone()), + description: Some(description.to_string()), + time_created: Utc::now(), + kind: IpKind::Floating, + is_service: false, + parent_id: None, + pool_id, + project_id: Some(project_id), + explicit_ip: Some(explicit_ip.into()), + explicit_port_range: None, + } + } + pub fn for_service_explicit( id: Uuid, name: &Name, @@ -171,6 +228,7 @@ impl IncompleteExternalIp { is_service: true, parent_id: Some(service_id), pool_id, + project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range: None, } @@ -199,6 +257,7 @@ impl IncompleteExternalIp { is_service: true, parent_id: Some(service_id), pool_id, + project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range, } @@ -220,6 +279,7 @@ impl IncompleteExternalIp { is_service: true, parent_id: Some(service_id), pool_id, + project_id: None, explicit_ip: None, explicit_port_range: None, } @@ -235,6 +295,7 @@ impl IncompleteExternalIp { is_service: true, parent_id: Some(service_id), pool_id, + project_id: None, explicit_ip: None, explicit_port_range: None, } @@ -272,6 +333,10 @@ impl IncompleteExternalIp { &self.pool_id } + pub fn project_id(&self) -> &Option { + &self.project_id + } + pub fn explicit_ip(&self) -> &Option { &self.explicit_ip } @@ -308,3 +373,78 @@ impl TryFrom for views::ExternalIp { Ok(views::ExternalIp { kind, ip: ip.ip.ip() }) } } + +impl TryFrom for FloatingIp { + type Error = Error; + + fn try_from(ip: ExternalIp) -> Result { + if ip.kind != IpKind::Floating { + return Err(Error::internal_error( + "attempted to convert non-floating external IP to floating", + )); + } + if ip.is_service { + return Err(Error::internal_error( + "Service IPs should not be exposed in the API", + )); + } + + let project_id = ip.project_id.ok_or(Error::internal_error( + "database schema guarantees parent project for non-service FIP", + ))?; + + let name = ip.name.ok_or(Error::internal_error( + "database schema guarantees ID metadata for non-service FIP", + ))?; + + let description = ip.description.ok_or(Error::internal_error( + "database schema guarantees ID metadata for non-service FIP", + ))?; + + let identity = FloatingIpIdentity { + id: ip.id, + name, + description, + time_created: ip.time_created, + time_modified: ip.time_modified, + time_deleted: ip.time_deleted, + }; + + Ok(FloatingIp { + ip: ip.ip, + identity, + project_id, + ip_pool_id: ip.ip_pool_id, + ip_pool_range_id: ip.ip_pool_range_id, + is_service: ip.is_service, + parent_id: ip.parent_id, + }) + } +} + +impl TryFrom for views::FloatingIp { + type Error = Error; + + fn try_from(ip: ExternalIp) -> Result { + FloatingIp::try_from(ip).map(Into::into) + } +} + +impl From for views::FloatingIp { + fn from(ip: FloatingIp) -> Self { + let identity = IdentityMetadata { + id: ip.identity.id, + name: ip.identity.name.into(), + description: ip.identity.description, + time_created: ip.identity.time_created, + time_modified: ip.identity.time_modified, + }; + + views::FloatingIp { + ip: ip.ip.ip(), + identity, + project_id: ip.project_id, + instance_id: ip.parent_id, + } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 373785799e..51501b4894 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -525,6 +525,7 @@ table! { time_created -> Timestamptz, time_modified -> Timestamptz, time_deleted -> Nullable, + ip_pool_id -> Uuid, ip_pool_range_id -> Uuid, is_service -> Bool, @@ -533,6 +534,26 @@ table! { ip -> Inet, first_port -> Int4, last_port -> Int4, + + project_id -> Nullable, + } +} + +table! { + floating_ip (id) { + id -> Uuid, + name -> Text, + description -> Text, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + + ip_pool_id -> Uuid, + ip_pool_range_id -> Uuid, + is_service -> Bool, + parent_id -> Nullable, + ip -> Inet, + project_id -> Uuid, } } @@ -1301,7 +1322,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(18, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(19, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index b22fe1ac25..2dfe2f7174 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -791,6 +791,14 @@ authz_resource! { polar_snippet = InProject, } +authz_resource! { + name = "FloatingIp", + parent = "Project", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = InProject, +} + // Customer network integration resources nested below "Fleet" authz_resource! { diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index e642062ead..6098379287 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -131,6 +131,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { VpcRouter::init(), RouterRoute::init(), VpcSubnet::init(), + FloatingIp::init(), // Silo-level resources Image::init(), SiloImage::init(), diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index 3049f3b9bf..8bdd97923b 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -319,6 +319,13 @@ async fn make_project( Uuid::new_v4(), LookupType::ByName(image_name), )); + + let floating_ip_name = format!("{project_name}-fip1"); + builder.new_resource(authz::FloatingIp::new( + project.clone(), + Uuid::new_v4(), + LookupType::ByName(floating_ip_name), + )); } /// Returns the set of authz classes exempted from the coverage test diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 4e34bfc15c..e821082501 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -15,9 +15,11 @@ use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; +use crate::db::model::FloatingIp; use crate::db::model::IncompleteExternalIp; use crate::db::model::IpKind; use crate::db::model::Name; +use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; use crate::db::update_and_check::UpdateAndCheck; @@ -25,10 +27,18 @@ use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_types::external_api::params; use nexus_types::identity::Resource; +use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_common::api::external::NameOrId; +use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; +use ref_cast::RefCast; use std::net::IpAddr; use uuid::Uuid; @@ -128,6 +138,56 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + /// Allocates a floating IP address for instance usage. + pub async fn allocate_floating_ip( + &self, + opctx: &OpContext, + project_id: Uuid, + params: params::FloatingIpCreate, + ) -> CreateResult { + let ip_id = Uuid::new_v4(); + + let pool_id = match params.pool { + Some(NameOrId::Name(name)) => { + LookupPath::new(opctx, self) + .ip_pool_name(&Name(name)) + .fetch_for(authz::Action::Read) + .await? + .1 + } + Some(NameOrId::Id(id)) => { + LookupPath::new(opctx, self) + .ip_pool_id(id) + .fetch_for(authz::Action::Read) + .await? + .1 + } + None => self.ip_pools_fetch_default(opctx).await?, + } + .id(); + + let data = if let Some(ip) = params.address { + IncompleteExternalIp::for_floating_explicit( + ip_id, + &Name(params.identity.name), + ¶ms.identity.description, + project_id, + ip, + pool_id, + ) + } else { + IncompleteExternalIp::for_floating( + ip_id, + &Name(params.identity.name), + ¶ms.identity.description, + project_id, + pool_id, + ) + }; + + self.allocate_external_ip(opctx, data).await + } + async fn allocate_external_ip( &self, opctx: &OpContext, @@ -144,8 +204,13 @@ impl DataStore { conn: &async_bb8_diesel::Connection, data: IncompleteExternalIp, ) -> Result> { + use diesel::result::DatabaseErrorKind::UniqueViolation; + // Name needs to be cloned out here (if present) to give users a + // sensible error message on name collision. + let name = data.name().clone(); let explicit_ip = data.explicit_ip().is_some(); NextExternalIp::new(data).get_result_async(conn).await.map_err(|e| { + use diesel::result::Error::DatabaseError; use diesel::result::Error::NotFound; match e { NotFound => { @@ -159,6 +224,17 @@ impl DataStore { )) } } + DatabaseError(UniqueViolation, ..) if name.is_some() => { + TransactionError::CustomError(public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::FloatingIp, + name.as_ref() + .map(|m| m.as_str()) + .unwrap_or_default(), + ), + )) + } _ => { if retryable(&e) { return TransactionError::Database(e); @@ -255,8 +331,6 @@ impl DataStore { /// This method returns the number of records deleted, rather than the usual /// `DeleteResult`. That's mostly useful for tests, but could be important /// if callers have some invariants they'd like to check. - // TODO-correctness: This can't be used for Floating IPs, we'll need a - // _detatch_ method for that. pub async fn deallocate_external_ip_by_instance_id( &self, opctx: &OpContext, @@ -275,6 +349,27 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Detach an individual Floating IP address from its parent instance. + /// + /// As in `deallocate_external_ip_by_instance_id`, this method returns the + /// number of records altered, rather than an `UpdateResult`. + pub async fn detach_floating_ips_by_instance_id( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> Result { + use db::schema::external_ip::dsl; + diesel::update(dsl::external_ip) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::is_service.eq(false)) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::kind.eq(IpKind::Floating)) + .set(dsl::parent_id.eq(Option::::None)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Fetch all external IP addresses of any kind for the provided instance pub async fn instance_lookup_external_ips( &self, @@ -291,4 +386,167 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + + /// Fetch all Floating IP addresses for the provided project. + pub async fn floating_ips_list( + &self, + opctx: &OpContext, + authz_project: &authz::Project, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::floating_ip::dsl; + + opctx.authorize(authz::Action::ListChildren, authz_project).await?; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::floating_ip, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::floating_ip, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::project_id.eq(authz_project.id())) + .filter(dsl::time_deleted.is_null()) + .select(FloatingIp::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Delete a Floating IP, verifying first that it is not in use. + pub async fn floating_ip_delete( + &self, + opctx: &OpContext, + authz_fip: &authz::FloatingIp, + db_fip: &FloatingIp, + ) -> DeleteResult { + use db::schema::external_ip::dsl; + + // Verify this FIP is not attached to any instances/services. + if db_fip.parent_id.is_some() { + return Err(Error::invalid_request( + "Floating IP cannot be deleted while attached to an instance", + )); + } + + opctx.authorize(authz::Action::Delete, authz_fip).await?; + + let now = Utc::now(); + let updated_rows = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(db_fip.id())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.is_null()) + .set(dsl::time_deleted.eq(now)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_fip), + ) + })?; + + if updated_rows == 0 { + return Err(Error::InvalidRequest { + message: "deletion failed due to concurrent modification" + .to_string(), + }); + } + Ok(()) + } + + /// Attaches a Floating IP address to an instance. + pub async fn floating_ip_attach( + &self, + opctx: &OpContext, + authz_fip: &authz::FloatingIp, + db_fip: &FloatingIp, + instance_id: Uuid, + ) -> UpdateResult { + use db::schema::external_ip::dsl; + + // Verify this FIP is not attached to any instances/services. + if db_fip.parent_id.is_some() { + return Err(Error::invalid_request( + "Floating IP cannot be attached to one instance while still attached to another", + )); + } + + let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + .instance_id(instance_id) + .fetch_for(authz::Action::Modify) + .await?; + + opctx.authorize(authz::Action::Modify, authz_fip).await?; + opctx.authorize(authz::Action::Modify, &authz_instance).await?; + + diesel::update(dsl::external_ip) + .filter(dsl::id.eq(db_fip.id())) + .filter(dsl::kind.eq(IpKind::Floating)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.is_null()) + .set(( + dsl::parent_id.eq(Some(instance_id)), + dsl::time_modified.eq(Utc::now()), + )) + .returning(ExternalIp::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_fip), + ) + }) + .and_then(|r| FloatingIp::try_from(r)) + .map_err(|e| Error::internal_error(&format!("{e}"))) + } + + /// Detaches a Floating IP address from an instance. + pub async fn floating_ip_detach( + &self, + opctx: &OpContext, + authz_fip: &authz::FloatingIp, + db_fip: &FloatingIp, + ) -> UpdateResult { + use db::schema::external_ip::dsl; + + let Some(instance_id) = db_fip.parent_id else { + return Err(Error::invalid_request( + "Floating IP is not attached to an instance", + )); + }; + + let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + .instance_id(instance_id) + .fetch_for(authz::Action::Modify) + .await?; + + opctx.authorize(authz::Action::Modify, authz_fip).await?; + opctx.authorize(authz::Action::Modify, &authz_instance).await?; + + diesel::update(dsl::external_ip) + .filter(dsl::id.eq(db_fip.id())) + .filter(dsl::kind.eq(IpKind::Floating)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(Utc::now()), + )) + .returning(ExternalIp::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_fip), + ) + }) + .and_then(|r| FloatingIp::try_from(r)) + .map_err(|e| Error::internal_error(&format!("{e}"))) + } } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 2e7f9da5b7..2844285f40 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -1661,6 +1661,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), + project_id: None, is_service: false, parent_id: Some(instance_id), kind: IpKind::Ephemeral, @@ -1721,6 +1722,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), + project_id: None, is_service: false, parent_id: Some(Uuid::new_v4()), kind: IpKind::SNat, @@ -1767,6 +1769,7 @@ mod test { use crate::db::model::IpKind; use crate::db::schema::external_ip::dsl; use diesel::result::DatabaseErrorKind::CheckViolation; + use diesel::result::DatabaseErrorKind::UniqueViolation; use diesel::result::Error::DatabaseError; let logctx = dev::test_setup_log("test_external_ip_check_constraints"); @@ -1791,6 +1794,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), + project_id: None, is_service: false, parent_id: Some(Uuid::new_v4()), kind: IpKind::Floating, @@ -1803,151 +1807,190 @@ mod test { // - name // - description // - parent (instance / service) UUID - let names = [ - None, - Some(db::model::Name(Name::try_from("foo".to_string()).unwrap())), - ]; + // - project UUID + let names = [None, Some("foo")]; let descriptions = [None, Some("foo".to_string())]; let parent_ids = [None, Some(Uuid::new_v4())]; + let project_ids = [None, Some(Uuid::new_v4())]; + + let mut seen_pairs = HashSet::new(); // For Floating IPs, both name and description must be non-NULL - for name in names.iter() { - for description in descriptions.iter() { - for parent_id in parent_ids.iter() { - for is_service in [false, true] { - let new_ip = ExternalIp { - id: Uuid::new_v4(), - name: name.clone(), - description: description.clone(), - ip: addresses.next().unwrap().into(), - is_service, - parent_id: *parent_id, - ..ip - }; - let res = diesel::insert_into(dsl::external_ip) - .values(new_ip) - .execute_async(&*conn) - .await; - if name.is_some() && description.is_some() { - // Name/description must be non-NULL, instance ID can be - // either - res.unwrap_or_else(|_| { - panic!( - "Failed to insert Floating IP with valid \ - name, description, and {} ID", - if is_service { - "Service" - } else { - "Instance" - } - ) - }); - } else { - // At least one is not valid, we expect a check violation - let err = res.expect_err( - "Expected a CHECK violation when inserting a \ - Floating IP record with NULL name and/or description", - ); - assert!( - matches!( - err, - DatabaseError( - CheckViolation, - _ - ) - ), - "Expected a CHECK violation when inserting a \ - Floating IP record with NULL name and/or description", - ); - } - } - } + // If they are instance FIPs, they *must* have a project id. + for ( + name, + description, + parent_id, + is_service, + project_id, + modify_name, + ) in itertools::iproduct!( + &names, + &descriptions, + &parent_ids, + [false, true], + &project_ids, + [false, true] + ) { + // Both choices of parent_id are valid, so we need a unique name for each. + let name_local = name.map(|v| { + let name = if modify_name { + v.to_string() + } else { + format!("{v}-with-parent") + }; + db::model::Name(Name::try_from(name).unwrap()) + }); + + // We do name duplicate checking on the `Some` branch, don't steal the + // name intended for another floating IP. + if parent_id.is_none() && modify_name { + continue; + } + + let new_ip = ExternalIp { + id: Uuid::new_v4(), + name: name_local.clone(), + description: description.clone(), + ip: addresses.next().unwrap().into(), + is_service, + parent_id: *parent_id, + project_id: *project_id, + ..ip + }; + + let key = (*project_id, name_local); + + let res = diesel::insert_into(dsl::external_ip) + .values(new_ip) + .execute_async(&*conn) + .await; + + let project_as_expected = (is_service && project_id.is_none()) + || (!is_service && project_id.is_some()); + + let valid_expression = + name.is_some() && description.is_some() && project_as_expected; + let name_exists = seen_pairs.contains(&key); + + if valid_expression && !name_exists { + // Name/description must be non-NULL, instance ID can be + // either + // Names must be unique at fleet level and at project level. + // Project must be NULL if service, non-NULL if instance. + res.unwrap_or_else(|e| { + panic!( + "Failed to insert Floating IP with valid \ + name, description, project ID, and {} ID:\ + {name:?} {description:?} {project_id:?} {:?}\n{e}", + if is_service { "Service" } else { "Instance" }, + &ip.parent_id + ) + }); + + seen_pairs.insert(key); + } else if !valid_expression { + // Several permutations are invalid and we want to detect them all. + // NOTE: CHECK violation will supersede UNIQUE violation below. + let err = res.expect_err( + "Expected a CHECK violation when inserting a \ + Floating IP record with NULL name and/or description, \ + and incorrect project parent relation", + ); + assert!( + matches!(err, DatabaseError(CheckViolation, _)), + "Expected a CHECK violation when inserting a \ + Floating IP record with NULL name and/or description, \ + and incorrect project parent relation", + ); + } else { + let err = res.expect_err( + "Expected a UNIQUE violation when inserting a \ + Floating IP record with existing (name, project_id)", + ); + assert!( + matches!(err, DatabaseError(UniqueViolation, _)), + "Expected a UNIQUE violation when inserting a \ + Floating IP record with existing (name, project_id)", + ); } } - // For other IP types, both name and description must be NULL - for kind in [IpKind::SNat, IpKind::Ephemeral].into_iter() { - for name in names.iter() { - for description in descriptions.iter() { - for parent_id in parent_ids.iter() { - for is_service in [false, true] { - let new_ip = ExternalIp { - id: Uuid::new_v4(), - name: name.clone(), - description: description.clone(), - kind, - ip: addresses.next().unwrap().into(), - is_service, - parent_id: *parent_id, - ..ip - }; - let res = diesel::insert_into(dsl::external_ip) - .values(new_ip.clone()) - .execute_async(&*conn) - .await; - let ip_type = - if is_service { "Service" } else { "Instance" }; - if name.is_none() - && description.is_none() - && parent_id.is_some() - { - // Name/description must be NULL, instance ID cannot - // be NULL. - - if kind == IpKind::Ephemeral && is_service { - // Ephemeral Service IPs aren't supported. - let err = res.unwrap_err(); - assert!( - matches!( - err, - DatabaseError( - CheckViolation, - _ - ) - ), - "Expected a CHECK violation when inserting an \ - Ephemeral Service IP", - ); - } else { - assert!( - res.is_ok(), - "Failed to insert {:?} IP with valid \ - name, description, and {} ID", - kind, - ip_type, - ); - } - } else { - // One is not valid, we expect a check violation - assert!( - res.is_err(), - "Expected a CHECK violation when inserting a \ - {:?} IP record with non-NULL name, description, \ - and/or {} ID", - kind, - ip_type, - ); - let err = res.unwrap_err(); - assert!( - matches!( - err, - DatabaseError( - CheckViolation, - _ - ) - ), - "Expected a CHECK violation when inserting a \ - {:?} IP record with non-NULL name, description, \ - and/or {} ID", - kind, - ip_type, - ); - } - } - } + // For other IP types: name, description and project must be NULL + for (kind, name, description, parent_id, is_service, project_id) in itertools::iproduct!( + [IpKind::SNat, IpKind::Ephemeral], + &names, + &descriptions, + &parent_ids, + [false, true], + &project_ids + ) { + let name_local = name.map(|v| { + db::model::Name(Name::try_from(v.to_string()).unwrap()) + }); + let new_ip = ExternalIp { + id: Uuid::new_v4(), + name: name_local, + description: description.clone(), + kind, + ip: addresses.next().unwrap().into(), + is_service, + parent_id: *parent_id, + project_id: *project_id, + ..ip + }; + let res = diesel::insert_into(dsl::external_ip) + .values(new_ip.clone()) + .execute_async(&*conn) + .await; + let ip_type = if is_service { "Service" } else { "Instance" }; + if name.is_none() + && description.is_none() + && parent_id.is_some() + && project_id.is_none() + { + // Name/description must be NULL, instance ID cannot + // be NULL. + + if kind == IpKind::Ephemeral && is_service { + // Ephemeral Service IPs aren't supported. + let err = res.unwrap_err(); + assert!( + matches!(err, DatabaseError(CheckViolation, _)), + "Expected a CHECK violation when inserting an \ + Ephemeral Service IP", + ); + } else { + assert!( + res.is_ok(), + "Failed to insert {:?} IP with valid \ + name, description, and {} ID", + kind, + ip_type, + ); } + } else { + // One is not valid, we expect a check violation + assert!( + res.is_err(), + "Expected a CHECK violation when inserting a \ + {:?} IP record with non-NULL name, description, \ + and/or {} ID", + kind, + ip_type, + ); + let err = res.unwrap_err(); + assert!( + matches!(err, DatabaseError(CheckViolation, _)), + "Expected a CHECK violation when inserting a \ + {:?} IP record with non-NULL name, description, \ + and/or {} ID", + kind, + ip_type, + ); } } + db.cleanup().await.unwrap(); logctx.cleanup_successful(); } diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 72a32f562c..028694dc4b 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -231,6 +231,11 @@ impl<'a> LookupPath<'a> { RouterRoute::PrimaryKey(Root { lookup_root: self }, id) } + /// Select a resource of type FloatingIp, identified by its id + pub fn floating_ip_id(self, id: Uuid) -> FloatingIp<'a> { + FloatingIp::PrimaryKey(Root { lookup_root: self }, id) + } + // Fleet-level resources /// Select a resource of type ConsoleSession, identified by its `token` @@ -632,7 +637,7 @@ lookup_resource! { lookup_resource! { name = "Project", ancestors = [ "Silo" ], - children = [ "Disk", "Instance", "Vpc", "Snapshot", "ProjectImage" ], + children = [ "Disk", "Instance", "Vpc", "Snapshot", "ProjectImage", "FloatingIp" ], lookup_by_name = true, soft_deletes = true, primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] @@ -728,6 +733,15 @@ lookup_resource! { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] } +lookup_resource! { + name = "FloatingIp", + ancestors = [ "Silo", "Project" ], + children = [], + lookup_by_name = true, + soft_deletes = true, + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] +} + // Miscellaneous resources nested directly below "Fleet" lookup_resource! { diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index cf182e080d..4e5f59e79c 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -98,7 +98,8 @@ const MAX_PORT: u16 = u16::MAX; /// AS kind, /// candidate_ip AS ip, /// CAST(candidate_first_port AS INT4) AS first_port, -/// CAST(candidate_last_port AS INT4) AS last_port +/// CAST(candidate_last_port AS INT4) AS last_port, +/// AS project_id /// FROM /// SELECT * FROM ( /// -- Select all IP addresses by pool and range. @@ -371,6 +372,13 @@ impl NextExternalIp { out.push_identifier(dsl::first_port::NAME)?; out.push_sql(", CAST(candidate_last_port AS INT4) AS "); out.push_identifier(dsl::last_port::NAME)?; + out.push_sql(", "); + + // Project ID, possibly null + out.push_bind_param::, Option>(self.ip.project_id())?; + out.push_sql(" AS "); + out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(" FROM ("); self.push_address_sequence_subquery(out.reborrow())?; out.push_sql(") CROSS JOIN ("); diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 963f00f7e8..54fb6481a9 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -376,6 +376,20 @@ resource: ProjectImage "silo1-proj1-image1" silo1-proj1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: FloatingIp "silo1-proj1-fip1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Project "silo1-proj2" USER Q R LC RP M MP CC D @@ -488,6 +502,20 @@ resource: ProjectImage "silo1-proj2-image1" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: FloatingIp "silo1-proj2-fip1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Silo "silo2" USER Q R LC RP M MP CC D @@ -768,6 +796,20 @@ resource: ProjectImage "silo2-proj1-image1" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: FloatingIp "silo2-proj1-fip1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Rack id "c037e882-8b6d-c8b5-bef4-97e848eb0a50" USER Q R LC RP M MP CC D diff --git a/nexus/src/app/external_ip.rs b/nexus/src/app/external_ip.rs index 2354e97085..404f597288 100644 --- a/nexus/src/app/external_ip.rs +++ b/nexus/src/app/external_ip.rs @@ -5,11 +5,20 @@ //! External IP addresses for instances use crate::external_api::views::ExternalIp; +use crate::external_api::views::FloatingIp; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::IpKind; +use nexus_types::external_api::params; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::NameOrId; impl super::Nexus { pub(crate) async fn instance_list_external_ips( @@ -33,4 +42,82 @@ impl super::Nexus { }) .collect::>()) } + + pub(crate) fn floating_ip_lookup<'a>( + &'a self, + opctx: &'a OpContext, + fip_selector: params::FloatingIpSelector, + ) -> LookupResult> { + match fip_selector { + params::FloatingIpSelector { floating_ip: NameOrId::Id(id), project: None } => { + let floating_ip = + LookupPath::new(opctx, &self.db_datastore).floating_ip_id(id); + Ok(floating_ip) + } + params::FloatingIpSelector { + floating_ip: NameOrId::Name(name), + project: Some(project), + } => { + let floating_ip = self + .project_lookup(opctx, params::ProjectSelector { project })? + .floating_ip_name_owned(name.into()); + Ok(floating_ip) + } + params::FloatingIpSelector { + floating_ip: NameOrId::Id(_), + .. + } => Err(Error::invalid_request( + "when providing Floating IP as an ID project should not be specified", + )), + _ => Err(Error::invalid_request( + "Floating IP should either be UUID or project should be specified", + )), + } + } + + pub(crate) async fn floating_ips_list( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::ListChildren).await?; + + Ok(self + .db_datastore + .floating_ips_list(opctx, &authz_project, pagparams) + .await? + .into_iter() + .map(Into::into) + .collect()) + } + + pub(crate) async fn floating_ip_create( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + params: params::FloatingIpCreate, + ) -> CreateResult { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::CreateChild).await?; + + Ok(self + .db_datastore + .allocate_floating_ip(opctx, authz_project.id(), params) + .await? + .try_into() + .unwrap()) + } + + pub(crate) async fn floating_ip_delete( + &self, + opctx: &OpContext, + ip_lookup: lookup::FloatingIp<'_>, + ) -> DeleteResult { + let (.., authz_fip, db_fip) = + ip_lookup.fetch_for(authz::Action::Delete).await?; + + self.db_datastore.floating_ip_delete(opctx, &authz_fip, &db_fip).await + } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 923bb1777e..0edb2c5ea7 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -5,6 +5,7 @@ //! Virtual Machine Instances use super::MAX_DISKS_PER_INSTANCE; +use super::MAX_EPHEMERAL_IPS_PER_INSTANCE; use super::MAX_EXTERNAL_IPS_PER_INSTANCE; use super::MAX_MEMORY_BYTES_PER_INSTANCE; use super::MAX_NICS_PER_INSTANCE; @@ -52,6 +53,7 @@ use sled_agent_client::types::InstanceProperties; use sled_agent_client::types::InstancePutMigrationIdsBody; use sled_agent_client::types::InstancePutStateBody; use sled_agent_client::types::SourceNatConfig; +use std::matches; use std::net::SocketAddr; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; @@ -168,6 +170,18 @@ impl super::Nexus { MAX_EXTERNAL_IPS_PER_INSTANCE, ))); } + if params + .external_ips + .iter() + .filter(|v| matches!(v, params::ExternalIpCreate::Ephemeral { .. })) + .count() + > MAX_EPHEMERAL_IPS_PER_INSTANCE + { + return Err(Error::invalid_request(&format!( + "An instance may not have more than {} ephemeral IP address", + MAX_EPHEMERAL_IPS_PER_INSTANCE, + ))); + } if let params::InstanceNetworkInterfaceAttachment::Create(ref ifaces) = params.network_interfaces { @@ -885,8 +899,6 @@ impl super::Nexus { .await?; // Collect the external IPs for the instance. - // TODO-correctness: Handle Floating IPs, see - // https://github.com/oxidecomputer/omicron/issues/1334 let (snat_ip, external_ips): (Vec<_>, Vec<_>) = self .db_datastore .instance_lookup_external_ips(&opctx, authz_instance.id()) @@ -895,8 +907,6 @@ impl super::Nexus { .partition(|ip| ip.kind == IpKind::SNat); // Sanity checks on the number and kind of each IP address. - // TODO-correctness: Handle multiple IP addresses, see - // https://github.com/oxidecomputer/omicron/issues/1467 if external_ips.len() > MAX_EXTERNAL_IPS_PER_INSTANCE { return Err(Error::internal_error( format!( @@ -908,8 +918,28 @@ impl super::Nexus { .as_str(), )); } - let external_ips = - external_ips.into_iter().map(|model| model.ip.ip()).collect(); + + // Partition remaining external IPs by class: we can have at most + // one ephemeral ip. + let (ephemeral_ips, floating_ips): (Vec<_>, Vec<_>) = external_ips + .into_iter() + .partition(|ip| ip.kind == IpKind::Ephemeral); + + if ephemeral_ips.len() > MAX_EPHEMERAL_IPS_PER_INSTANCE { + return Err(Error::internal_error( + format!( + "Expected at most {} ephemeral IP for an instance, found {}", + MAX_EPHEMERAL_IPS_PER_INSTANCE, + ephemeral_ips.len() + ) + .as_str(), + )); + } + + let ephemeral_ip = ephemeral_ips.get(0).map(|model| model.ip.ip()); + + let floating_ips = + floating_ips.into_iter().map(|model| model.ip.ip()).collect(); if snat_ip.len() != 1 { return Err(Error::internal_error( "Expected exactly one SNAT IP address for an instance", @@ -985,7 +1015,8 @@ impl super::Nexus { }, nics, source_nat, - external_ips, + ephemeral_ip, + floating_ips, firewall_rules, dhcp_config: sled_agent_client::types::DhcpConfig { dns_servers: self.external_dns_servers.clone(), diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 18c9dae841..d4c2d596f8 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -79,8 +79,13 @@ pub(crate) use nexus_db_queries::db::queries::disk::MAX_DISKS_PER_INSTANCE; pub(crate) const MAX_NICS_PER_INSTANCE: usize = 8; -// TODO-completeness: Support multiple external IPs -pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = 1; +// XXX: Might want to recast as max *floating* IPs, we have at most one +// ephemeral (so bounded in saga by design). +// The value here is arbitrary, but we need *a* limit for the instance +// create saga to have a bounded DAG. We might want to only enforce +// this during instance create (rather than live attach) in future. +pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = 32; +pub(crate) const MAX_EPHEMERAL_IPS_PER_INSTANCE: usize = 1; pub const MAX_VCPU_PER_INSTANCE: u16 = 64; diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 8c2f96c36c..5149825842 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -598,35 +598,55 @@ async fn sic_allocate_instance_snat_ip_undo( async fn sic_allocate_instance_external_ip( sagactx: NexusActionContext, ) -> Result<(), ActionError> { + // XXX: may wish to restructure partially: we have at most one ephemeral + // and then at most $n$ floating. let osagactx = sagactx.user_data(); let datastore = osagactx.datastore(); let repeat_saga_params = sagactx.saga_params::()?; let saga_params = repeat_saga_params.saga_params; let ip_index = repeat_saga_params.which; - let ip_params = saga_params.create_params.external_ips.get(ip_index); - let ip_params = match ip_params { - None => { - return Ok(()); - } - Some(ref prs) => prs, + let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) + else { + return Ok(()); }; let opctx = crate::context::op_context_for_saga_action( &sagactx, &saga_params.serialized_authn, ); let instance_id = repeat_saga_params.instance_id; - let ip_id = repeat_saga_params.new_id; - // Collect the possible pool name for this IP address - let pool_name = match ip_params { + match ip_params { + // Allocate a new IP address from the target, possibly default, pool params::ExternalIpCreate::Ephemeral { ref pool_name } => { - pool_name.as_ref().map(|name| db::model::Name(name.clone())) + let pool_name = + pool_name.as_ref().map(|name| db::model::Name(name.clone())); + let ip_id = repeat_saga_params.new_id; + datastore + .allocate_instance_ephemeral_ip( + &opctx, + ip_id, + instance_id, + pool_name, + ) + .await + .map_err(ActionError::action_failed)?; } - }; - datastore - .allocate_instance_ephemeral_ip(&opctx, ip_id, instance_id, pool_name) - .await - .map_err(ActionError::action_failed)?; + // Set the parent of an existing floating IP to the new instance's ID. + params::ExternalIpCreate::Floating { ref floating_ip_name } => { + let floating_ip_name = db::model::Name(floating_ip_name.clone()); + let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) + .project_id(saga_params.project_id) + .floating_ip_name(&floating_ip_name) + .fetch_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_attach(&opctx, &authz_fip, &db_fip, instance_id) + .await + .map_err(ActionError::action_failed)?; + } + } Ok(()) } @@ -638,16 +658,31 @@ async fn sic_allocate_instance_external_ip_undo( let repeat_saga_params = sagactx.saga_params::()?; let saga_params = repeat_saga_params.saga_params; let ip_index = repeat_saga_params.which; - if ip_index >= saga_params.create_params.external_ips.len() { - return Ok(()); - } - let opctx = crate::context::op_context_for_saga_action( &sagactx, &saga_params.serialized_authn, ); - let ip_id = repeat_saga_params.new_id; - datastore.deallocate_external_ip(&opctx, ip_id).await?; + let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) + else { + return Ok(()); + }; + + match ip_params { + params::ExternalIpCreate::Ephemeral { .. } => { + let ip_id = repeat_saga_params.new_id; + datastore.deallocate_external_ip(&opctx, ip_id).await?; + } + params::ExternalIpCreate::Floating { floating_ip_name } => { + let floating_ip_name = db::model::Name(floating_ip_name.clone()); + let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) + .project_id(saga_params.project_id) + .floating_ip_name(&floating_ip_name) + .fetch_for(authz::Action::Modify) + .await?; + + datastore.floating_ip_detach(&opctx, &authz_fip, &db_fip).await?; + } + } Ok(()) } diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index e35b922c87..7802312b10 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -158,6 +158,11 @@ async fn sid_deallocate_external_ip( ) .await .map_err(ActionError::action_failed)?; + osagactx + .datastore() + .detach_floating_ips_by_instance_id(&opctx, params.authz_instance.id()) + .await + .map_err(ActionError::action_failed)?; Ok(()) } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index ef8d73afab..a113451fc7 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -140,6 +140,11 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(ip_pool_service_range_add)?; api.register(ip_pool_service_range_remove)?; + api.register(floating_ip_list)?; + api.register(floating_ip_create)?; + api.register(floating_ip_view)?; + api.register(floating_ip_delete)?; + api.register(disk_list)?; api.register(disk_create)?; api.register(disk_view)?; @@ -1521,6 +1526,126 @@ async fn ip_pool_service_range_remove( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +// Floating IP Addresses + +/// List all Floating IPs +#[endpoint { + method = GET, + path = "/v1/floating-ips", + tags = ["floating-ips"], +}] +async fn floating_ip_list( + rqctx: RequestContext>, + query_params: Query>, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let ips = nexus + .floating_ips_list(&opctx, &project_lookup, &paginated_by) + .await?; + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + ips, + &marker_for_name_or_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Create a Floating IP +#[endpoint { + method = POST, + path = "/v1/floating-ips", + tags = ["floating-ips"], +}] +async fn floating_ip_create( + rqctx: RequestContext>, + query_params: Query, + floating_params: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.nexus; + let floating_params = floating_params.into_inner(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = + nexus.project_lookup(&opctx, query_params.into_inner())?; + let ip = nexus + .floating_ip_create(&opctx, &project_lookup, floating_params) + .await?; + Ok(HttpResponseCreated(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Delete a Floating IP +#[endpoint { + method = DELETE, + path = "/v1/floating-ips/{floating_ip}", + tags = ["floating-ips"], +}] +async fn floating_ip_delete( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + + nexus.floating_ip_delete(&opctx, fip_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Fetch a floating IP +#[endpoint { + method = GET, + path = "/v1/floating-ips/{floating_ip}", + tags = ["floating-ips"] +}] +async fn floating_ip_view( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let (.., fip) = nexus + .floating_ip_lookup(&opctx, floating_ip_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(fip.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Disks /// List disks diff --git a/nexus/src/external_api/tag-config.json b/nexus/src/external_api/tag-config.json index 07eb198016..3bc8006cee 100644 --- a/nexus/src/external_api/tag-config.json +++ b/nexus/src/external_api/tag-config.json @@ -8,6 +8,12 @@ "url": "http://docs.oxide.computer/api/disks" } }, + "floating-ips": { + "description": "Floating IPs allow a project to allocate well-known IPs to instances.", + "external_docs": { + "url": "http://docs.oxide.computer/api/floating-ips" + } + }, "hidden": { "description": "TODO operations that will not ship to customers", "external_docs": { diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 2368c3f568..1848989bf9 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -21,6 +21,7 @@ use nexus_types::external_api::shared::IdentityType; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::views; use nexus_types::external_api::views::Certificate; +use nexus_types::external_api::views::FloatingIp; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; use nexus_types::external_api::views::User; @@ -32,7 +33,9 @@ use omicron_common::api::external::Disk; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceCpuCount; +use omicron_common::api::external::NameOrId; use omicron_sled_agent::sim::SledAgent; +use std::net::IpAddr; use std::sync::Arc; use uuid::Uuid; @@ -149,6 +152,28 @@ pub async fn create_ip_pool( (pool, range) } +pub async fn create_floating_ip( + client: &ClientTestContext, + fip_name: &str, + project: &str, + address: Option, + parent_pool_name: Option<&str>, +) -> FloatingIp { + object_create( + client, + &format!("/v1/floating-ips?project={project}"), + ¶ms::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: fip_name.parse().unwrap(), + description: String::from("a floating ip"), + }, + address, + pool: parent_pool_name.map(|v| NameOrId::Name(v.parse().unwrap())), + }, + ) + .await +} + pub async fn create_certificate( client: &ClientTestContext, cert_name: &str, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 536b96f7ae..db803bfde0 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -134,6 +134,7 @@ lazy_static! { pub static ref DEMO_PROJECT_URL_INSTANCES: String = format!("/v1/instances?project={}", *DEMO_PROJECT_NAME); pub static ref DEMO_PROJECT_URL_SNAPSHOTS: String = format!("/v1/snapshots?project={}", *DEMO_PROJECT_NAME); pub static ref DEMO_PROJECT_URL_VPCS: String = format!("/v1/vpcs?project={}", *DEMO_PROJECT_NAME); + pub static ref DEMO_PROJECT_URL_FIPS: String = format!("/v1/floating-ips?project={}", *DEMO_PROJECT_NAME); pub static ref DEMO_PROJECT_CREATE: params::ProjectCreate = params::ProjectCreate { identity: IdentityMetadataCreateParams { @@ -573,6 +574,22 @@ lazy_static! { }; } +lazy_static! { + // Project Floating IPs + pub static ref DEMO_FLOAT_IP_NAME: Name = "float-ip".parse().unwrap(); + pub static ref DEMO_FLOAT_IP_URL: String = + format!("/v1/floating-ips/{}?project={}", *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_NAME); + pub static ref DEMO_FLOAT_IP_CREATE: params::FloatingIpCreate = + params::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_FLOAT_IP_NAME.clone(), + description: String::from("a new IP pool"), + }, + address: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()), + pool: None, + }; +} + lazy_static! { // Identity providers pub static ref IDENTITY_PROVIDERS_URL: String = format!("/v1/system/identity-providers?silo=demo-silo"); @@ -1991,6 +2008,29 @@ lazy_static! { allowed_methods: vec![ AllowedMethod::GetNonexistent, ], + }, + + // Floating IPs + VerifyEndpoint { + url: &DEMO_PROJECT_URL_FIPS, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_FLOAT_IP_CREATE).unwrap(), + ), + AllowedMethod::Get, + ], + }, + + VerifyEndpoint { + url: &DEMO_FLOAT_IP_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Delete, + ], } ]; } diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs new file mode 100644 index 0000000000..f3161dea72 --- /dev/null +++ b/nexus/tests/integration_tests/external_ips.rs @@ -0,0 +1,432 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests Floating IP support in the API + +use std::net::IpAddr; +use std::net::Ipv4Addr; + +use crate::integration_tests::instances::instance_simulate; +use dropshot::test_util::ClientTestContext; +use dropshot::HttpErrorResponseBody; +use http::Method; +use http::StatusCode; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_floating_ip; +use nexus_test_utils::resource_helpers::create_instance_with; +use nexus_test_utils::resource_helpers::create_ip_pool; +use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::populate_ip_pool; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params; +use nexus_types::external_api::views::FloatingIp; +use omicron_common::address::IpRange; +use omicron_common::address::Ipv4Range; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::Instance; +use uuid::Uuid; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +const PROJECT_NAME: &str = "rootbeer-float"; + +const FIP_NAMES: &[&str] = &["vanilla", "chocolate", "strawberry", "pistachio"]; + +pub fn get_floating_ips_url(project_name: &str) -> String { + format!("/v1/floating-ips?project={project_name}") +} + +pub fn get_floating_ip_by_name_url( + fip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{fip_name}?project={project_name}") +} + +pub fn get_floating_ip_by_id_url(fip_id: &Uuid) -> String { + format!("/v1/floating-ips/{fip_id}") +} + +#[nexus_test] +async fn test_floating_ip_access(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create a floating IP from the default pool. + let fip_name = FIP_NAMES[0]; + let fip = create_floating_ip( + client, + fip_name, + &project.identity.id.to_string(), + None, + None, + ) + .await; + + // Fetch floating IP by ID + let fetched_fip = + floating_ip_get(&client, &get_floating_ip_by_id_url(&fip.identity.id)) + .await; + assert_eq!(fetched_fip.identity.id, fip.identity.id); + + // Fetch floating IP by name and project_id + let fetched_fip = floating_ip_get( + &client, + &get_floating_ip_by_name_url( + fip.identity.name.as_str(), + &project.identity.id.to_string(), + ), + ) + .await; + assert_eq!(fetched_fip.identity.id, fip.identity.id); + + // Fetch floating IP by name and project_name + let fetched_fip = floating_ip_get( + &client, + &get_floating_ip_by_name_url( + fip.identity.name.as_str(), + project.identity.name.as_str(), + ), + ) + .await; + assert_eq!(fetched_fip.identity.id, fip.identity.id); +} + +#[nexus_test] +async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + let other_pool_range = IpRange::V4( + Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 5)) + .unwrap(), + ); + create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + + let project = create_project(client, PROJECT_NAME).await; + + // Create with no chosen IP and fallback to default pool. + let fip_name = FIP_NAMES[0]; + let fip = create_floating_ip( + client, + fip_name, + project.identity.name.as_str(), + None, + None, + ) + .await; + assert_eq!(fip.identity.name.as_str(), fip_name); + assert_eq!(fip.project_id, project.identity.id); + assert_eq!(fip.instance_id, None); + assert_eq!(fip.ip, IpAddr::from(Ipv4Addr::new(10, 0, 0, 0))); + + // Create with chosen IP and fallback to default pool. + let fip_name = FIP_NAMES[1]; + let ip_addr = "10.0.12.34".parse().unwrap(); + let fip = create_floating_ip( + client, + fip_name, + project.identity.name.as_str(), + Some(ip_addr), + None, + ) + .await; + assert_eq!(fip.identity.name.as_str(), fip_name); + assert_eq!(fip.project_id, project.identity.id); + assert_eq!(fip.instance_id, None); + assert_eq!(fip.ip, ip_addr); + + // Create with no chosen IP from named pool. + let fip_name = FIP_NAMES[2]; + let fip = create_floating_ip( + client, + fip_name, + project.identity.name.as_str(), + None, + Some("other-pool"), + ) + .await; + assert_eq!(fip.identity.name.as_str(), fip_name); + assert_eq!(fip.project_id, project.identity.id); + assert_eq!(fip.instance_id, None); + assert_eq!(fip.ip, IpAddr::from(Ipv4Addr::new(10, 1, 0, 1))); + + // Create with chosen IP from named pool. + let fip_name = FIP_NAMES[3]; + let ip_addr = "10.1.0.5".parse().unwrap(); + let fip = create_floating_ip( + client, + fip_name, + project.identity.name.as_str(), + Some(ip_addr), + Some("other-pool"), + ) + .await; + assert_eq!(fip.identity.name.as_str(), fip_name); + assert_eq!(fip.project_id, project.identity.id); + assert_eq!(fip.instance_id, None); + assert_eq!(fip.ip, ip_addr); +} + +#[nexus_test] +async fn test_floating_ip_create_ip_in_use( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + + let project = create_project(client, PROJECT_NAME).await; + let contested_ip = "10.0.0.0".parse().unwrap(); + + // First create will succeed. + create_floating_ip( + client, + FIP_NAMES[0], + project.identity.name.as_str(), + Some(contested_ip), + None, + ) + .await; + + // Second will fail as the requested IP is in use in the selected + // (default) pool. + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + &get_floating_ips_url(PROJECT_NAME), + ) + .body(Some(¶ms::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: FIP_NAMES[1].parse().unwrap(), + description: "another fip".into(), + }, + address: Some(contested_ip), + pool: None, + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "Requested external IP address not available"); +} + +#[nexus_test] +async fn test_floating_ip_create_name_in_use( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + + let project = create_project(client, PROJECT_NAME).await; + let contested_name = FIP_NAMES[0]; + + // First create will succeed. + create_floating_ip( + client, + contested_name, + project.identity.name.as_str(), + None, + None, + ) + .await; + + // Second will fail as the requested name is in use within this + // project. + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + &get_floating_ips_url(PROJECT_NAME), + ) + .body(Some(¶ms::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: contested_name.parse().unwrap(), + description: "another fip".into(), + }, + address: None, + pool: None, + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + format!("already exists: floating-ip \"{contested_name}\""), + ); +} + +#[nexus_test] +async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + let project = create_project(client, PROJECT_NAME).await; + + let fip = create_floating_ip( + client, + FIP_NAMES[0], + project.identity.name.as_str(), + None, + None, + ) + .await; + + // Delete the floating IP. + NexusRequest::object_delete( + client, + &get_floating_ip_by_id_url(&fip.identity.id), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + +#[nexus_test] +async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + populate_ip_pool(&client, "default", None).await; + let project = create_project(client, PROJECT_NAME).await; + + let fip = create_floating_ip( + client, + FIP_NAMES[0], + project.identity.name.as_str(), + None, + None, + ) + .await; + + // Bind the floating IP to an instance at create time. + let instance_name = "anonymous-diner"; + let instance = create_instance_with( + &client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::Default, + vec![], + vec![params::ExternalIpCreate::Floating { + floating_ip_name: FIP_NAMES[0].parse().unwrap(), + }], + ) + .await; + + // Reacquire FIP: parent ID must have updated to match instance. + let fetched_fip = + floating_ip_get(&client, &get_floating_ip_by_id_url(&fip.identity.id)) + .await; + assert_eq!(fetched_fip.instance_id, Some(instance.identity.id)); + + // Try to delete the floating IP, which should fail. + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new( + client, + Method::DELETE, + &get_floating_ip_by_id_url(&fip.identity.id), + ) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + format!("Floating IP cannot be deleted while attached to an instance"), + ); + + // Stop and delete the instance. + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + + let _: Instance = NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + &format!("/v1/instances/{}/stop", instance.identity.id), + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + instance_simulate(nexus, &instance.identity.id).await; + + NexusRequest::object_delete( + &client, + &format!("/v1/instances/{instance_name}?project={PROJECT_NAME}"), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Reacquire FIP again: parent ID must now be unset. + let fetched_fip = + floating_ip_get(&client, &get_floating_ip_by_id_url(&fip.identity.id)) + .await; + assert_eq!(fetched_fip.instance_id, None); + + // Delete the floating IP. + NexusRequest::object_delete( + client, + &get_floating_ip_by_id_url(&fip.identity.id), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + +pub async fn floating_ip_get( + client: &ClientTestContext, + fip_url: &str, +) -> FloatingIp { + floating_ip_get_as(client, fip_url, AuthnMode::PrivilegedUser).await +} + +async fn floating_ip_get_as( + client: &ClientTestContext, + fip_url: &str, + authn_as: AuthnMode, +) -> FloatingIp { + NexusRequest::object_get(client, fip_url) + .authn_as(authn_as) + .execute() + .await + .unwrap_or_else(|e| { + panic!("failed to make \"get\" request to {fip_url}: {e}") + }) + .parsed_body() + .unwrap_or_else(|e| { + panic!("failed to make \"get\" request to {fip_url}: {e}") + }) +} diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index ea633be9dc..f54370c32f 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -4,11 +4,14 @@ //! Tests basic instance support in the API +use super::external_ips::floating_ip_get; +use super::external_ips::get_floating_ip_by_id_url; use super::metrics::{get_latest_silo_metric, get_latest_system_metric}; use camino::Utf8Path; use http::method::Method; use http::StatusCode; +use itertools::Itertools; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::fixed_data::silo::SILO_ID; @@ -18,6 +21,7 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_disk; +use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_local_user; use nexus_test_utils::resource_helpers::create_silo; @@ -54,6 +58,7 @@ use omicron_nexus::TestInterfaces as _; use omicron_sled_agent::sim::SledAgent; use sled_agent_client::TestInterfaces as _; use std::convert::TryFrom; +use std::net::Ipv4Addr; use std::sync::Arc; use uuid::Uuid; @@ -3645,6 +3650,139 @@ async fn test_instance_ephemeral_ip_from_correct_pool( ); } +#[nexus_test] +async fn test_instance_attach_several_external_ips( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _ = create_project(&client, PROJECT_NAME).await; + + // Create a single (large) IP pool + let default_pool_range = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 1), + std::net::Ipv4Addr::new(10, 0, 0, 10), + ) + .unwrap(), + ); + populate_ip_pool(&client, "default", Some(default_pool_range)).await; + + // Create several floating IPs for the instance, totalling 8 IPs. + let mut external_ip_create = + vec![params::ExternalIpCreate::Ephemeral { pool_name: None }]; + let mut fips = vec![]; + for i in 1..8 { + let name = format!("fip-{i}"); + fips.push( + create_floating_ip(&client, &name, PROJECT_NAME, None, None).await, + ); + external_ip_create.push(params::ExternalIpCreate::Floating { + floating_ip_name: name.parse().unwrap(), + }); + } + + // Create an instance with pool name blank, expect IP from default pool + let instance_name = "many-fips"; + let instance = create_instance_with( + &client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::Default, + vec![], + external_ip_create, + ) + .await; + + // Verify that all external IPs are visible on the instance and have + // been allocated in order. + let external_ips = + fetch_instance_external_ips(&client, instance_name).await; + assert_eq!(external_ips.len(), 8); + eprintln!("{external_ips:?}"); + for (i, eip) in external_ips + .iter() + .sorted_unstable_by(|a, b| a.ip.cmp(&b.ip)) + .enumerate() + { + let last_octet = i + if i != external_ips.len() - 1 { + assert_eq!(eip.kind, IpKind::Floating); + 1 + } else { + // SNAT will occupy 1.0.0.8 here, since it it alloc'd before + // the ephemeral. + assert_eq!(eip.kind, IpKind::Ephemeral); + 2 + }; + assert_eq!(eip.ip, Ipv4Addr::new(10, 0, 0, last_octet as u8)); + } + + // Verify that all floating IPs are bound to their parent instance. + for fip in fips { + let fetched_fip = floating_ip_get( + &client, + &get_floating_ip_by_id_url(&fip.identity.id), + ) + .await; + assert_eq!(fetched_fip.instance_id, Some(instance.identity.id)); + } +} + +#[nexus_test] +async fn test_instance_allow_only_one_ephemeral_ip( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _ = create_project(&client, PROJECT_NAME).await; + + // Create one IP pool with space for two ephemerals. + let default_pool_range = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 1), + std::net::Ipv4Addr::new(10, 0, 0, 2), + ) + .unwrap(), + ); + populate_ip_pool(&client, "default", Some(default_pool_range)).await; + + let ephemeral_create = params::ExternalIpCreate::Ephemeral { + pool_name: Some("default".parse().unwrap()), + }; + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &get_instances_url()) + .body(Some(¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "default-pool-inst".parse().unwrap(), + description: "instance default-pool-inst".into(), + }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: String::from("the_host"), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![ + ephemeral_create.clone(), ephemeral_create + ], + disks: vec![], + start: true, + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "An instance may not have more than 1 ephemeral IP address" + ); +} + async fn create_instance_with_pool( client: &ClientTestContext, instance_name: &str, @@ -3663,10 +3801,10 @@ async fn create_instance_with_pool( .await } -async fn fetch_instance_ephemeral_ip( +async fn fetch_instance_external_ips( client: &ClientTestContext, instance_name: &str, -) -> views::ExternalIp { +) -> Vec { let ips_url = format!( "/v1/instances/{}/external-ips?project={}", instance_name, PROJECT_NAME @@ -3678,9 +3816,18 @@ async fn fetch_instance_ephemeral_ip( .expect("Failed to fetch external IPs") .parsed_body::>() .expect("Failed to parse external IPs"); - assert_eq!(ips.items.len(), 1); - assert_eq!(ips.items[0].kind, IpKind::Ephemeral); - ips.items[0].clone() + ips.items +} + +async fn fetch_instance_ephemeral_ip( + client: &ClientTestContext, + instance_name: &str, +) -> views::ExternalIp { + fetch_instance_external_ips(client, instance_name) + .await + .into_iter() + .find(|v| v.kind == IpKind::Ephemeral) + .unwrap() } #[nexus_test] diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 4d7b41cfa8..53de24c518 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -12,6 +12,7 @@ mod commands; mod console_api; mod device_auth; mod disks; +mod external_ips; mod host_phase1_updater; mod images; mod initialization; diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 9936af20bf..1cb2eaca3a 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -278,6 +278,12 @@ lazy_static! { body: serde_json::to_value(&*DEMO_IMAGE_CREATE).unwrap(), id_routes: vec!["/v1/images/{id}"], }, + // Create a Floating IP in the project + SetupReq::Post { + url: &DEMO_PROJECT_URL_FIPS, + body: serde_json::to_value(&*DEMO_FLOAT_IP_CREATE).unwrap(), + id_routes: vec!["/v1/floating-ips/{id}"], + }, // Create a SAML identity provider SetupReq::Post { url: &SAML_IDENTITY_PROVIDERS_URL, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 7e57d00df2..b236d73551 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -11,6 +11,13 @@ disk_list GET /v1/disks disk_metrics_list GET /v1/disks/{disk}/metrics/{metric} disk_view GET /v1/disks/{disk} +API operations found with tag "floating-ips" +OPERATION ID METHOD URL PATH +floating_ip_create POST /v1/floating-ips +floating_ip_delete DELETE /v1/floating-ips/{floating_ip} +floating_ip_list GET /v1/floating-ips +floating_ip_view GET /v1/floating-ips/{floating_ip} + API operations found with tag "hidden" OPERATION ID METHOD URL PATH device_access_token POST /device/token diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 3303d38367..e582590aa0 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -54,6 +54,7 @@ path_param!(VpcPath, vpc, "VPC"); path_param!(SubnetPath, subnet, "subnet"); path_param!(RouterPath, router, "router"); path_param!(RoutePath, route, "route"); +path_param!(FloatingIpPath, floating_ip, "Floating IP"); path_param!(DiskPath, disk, "disk"); path_param!(SnapshotPath, snapshot, "snapshot"); path_param!(ImagePath, image, "image"); @@ -146,6 +147,14 @@ pub struct OptionalProjectSelector { pub project: Option, } +#[derive(Deserialize, JsonSchema)] +pub struct FloatingIpSelector { + /// Name or ID of the project, only required if `floating_ip` is provided as a `Name` + pub project: Option, + /// Name or ID of the Floating IP + pub floating_ip: NameOrId, +} + #[derive(Deserialize, JsonSchema)] pub struct DiskSelector { /// Name or ID of the project, only required if `disk` is provided as a `Name` @@ -768,6 +777,23 @@ pub struct IpPoolUpdate { pub identity: IdentityMetadataUpdateParams, } +// Floating IPs +/// Parameters for creating a new floating IP address for instances. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct FloatingIpCreate { + #[serde(flatten)] + pub identity: IdentityMetadataCreateParams, + + /// An IP address to reserve for use as a floating IP. This field is + /// optional: when not set, an address will be automatically chosen from + /// `pool`. If set, then the IP must be available in the resolved `pool`. + pub address: Option, + + /// The parent IP pool that a floating IP is pulled from. If unset, the + /// default pool is selected. + pub pool: Option, +} + // INSTANCES /// Describes an attachment of an `InstanceNetworkInterface` to an `Instance`, @@ -835,7 +861,11 @@ pub enum ExternalIpCreate { /// automatically-assigned from the provided IP Pool, or all available pools /// if not specified. Ephemeral { pool_name: Option }, - // TODO: Add floating IPs: https://github.com/oxidecomputer/omicron/issues/1334 + /// An IP address providing both inbound and outbound access. The address is + /// an existing Floating IP object assigned to the current project. + /// + /// The floating IP must not be in use by another instance or service. + Floating { floating_ip_name: Name }, } /// Create-time parameters for an `Instance` diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 4006b18bcc..047bd71814 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -265,6 +265,22 @@ pub struct ExternalIp { pub kind: IpKind, } +/// A Floating IP is a well-known IP address which can be attached +/// and detached from instances. +#[derive(ObjectIdentity, Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct FloatingIp { + #[serde(flatten)] + pub identity: IdentityMetadata, + /// The IP address held by this resource. + pub ip: IpAddr, + /// The project this resource exists within. + pub project_id: Uuid, + /// The ID of the instance that this Floating IP is attached to, + /// if it is presently in use. + pub instance_id: Option, +} + // RACKS /// View of an Rack diff --git a/openapi/nexus.json b/openapi/nexus.json index 1c7e25d004..6076663a2d 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -853,6 +853,204 @@ } } }, + "/v1/floating-ips": { + "get": { + "tags": [ + "floating-ips" + ], + "summary": "List all Floating IPs", + "operationId": "floating_ip_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIpResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "project" + ] + } + }, + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Create a Floating IP", + "operationId": "floating_ip_create", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIpCreate" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/floating-ips/{floating_ip}": { + "get": { + "tags": [ + "floating-ips" + ], + "summary": "Fetch a floating IP", + "operationId": "floating_ip_view", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the Floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "floating-ips" + ], + "summary": "Delete a Floating IP", + "operationId": "floating_ip_delete", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the Floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/groups": { "get": { "tags": [ @@ -10386,6 +10584,25 @@ "required": [ "type" ] + }, + { + "description": "An IP address providing both inbound and outbound access. The address is an existing Floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", + "type": "object", + "properties": { + "floating_ip_name": { + "$ref": "#/components/schemas/Name" + }, + "type": { + "type": "string", + "enum": [ + "floating" + ] + } + }, + "required": [ + "floating_ip_name", + "type" + ] } ] }, @@ -10470,6 +10687,116 @@ "role_name" ] }, + "FloatingIp": { + "description": "A Floating IP is a well-known IP address which can be attached and detached from instances.", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "instance_id": { + "nullable": true, + "description": "The ID of the instance that this Floating IP is attached to, if it is presently in use.", + "type": "string", + "format": "uuid" + }, + "ip": { + "description": "The IP address held by this resource.", + "type": "string", + "format": "ip" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "project_id": { + "description": "The project this resource exists within.", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "ip", + "name", + "project_id", + "time_created", + "time_modified" + ] + }, + "FloatingIpCreate": { + "description": "Parameters for creating a new floating IP address for instances.", + "type": "object", + "properties": { + "address": { + "nullable": true, + "description": "An IP address to reserve for use as a floating IP. This field is optional: when not set, an address will be automatically chosen from `pool`. If set, then the IP must be available in the resolved `pool`.", + "type": "string", + "format": "ip" + }, + "description": { + "type": "string" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "pool": { + "nullable": true, + "description": "The parent IP pool that a floating IP is pulled from. If unset, the default pool is selected.", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + }, + "required": [ + "description", + "name" + ] + }, + "FloatingIpResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/FloatingIp" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "Group": { "description": "View of a Group", "type": "object", @@ -15266,6 +15593,13 @@ "url": "http://docs.oxide.computer/api/disks" } }, + { + "name": "floating-ips", + "description": "Floating IPs allow a project to allocate well-known IPs to instances.", + "externalDocs": { + "url": "http://docs.oxide.computer/api/floating-ips" + } + }, { "name": "hidden", "description": "TODO operations that will not ship to customers", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 5e217b27a4..3a88b6cc9c 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4252,18 +4252,23 @@ "$ref": "#/components/schemas/DiskRequest" } }, - "external_ips": { + "ephemeral_ip": { + "nullable": true, "description": "Zero or more external IP addresses (either floating or ephemeral), provided to an instance to allow inbound connectivity.", + "type": "string", + "format": "ip" + }, + "firewall_rules": { "type": "array", "items": { - "type": "string", - "format": "ip" + "$ref": "#/components/schemas/VpcFirewallRule" } }, - "firewall_rules": { + "floating_ips": { "type": "array", "items": { - "$ref": "#/components/schemas/VpcFirewallRule" + "type": "string", + "format": "ip" } }, "nics": { @@ -4282,8 +4287,8 @@ "required": [ "dhcp_config", "disks", - "external_ips", "firewall_rules", + "floating_ips", "nics", "properties", "source_nat" diff --git a/schema/crdb/19.0.0/up01.sql b/schema/crdb/19.0.0/up01.sql new file mode 100644 index 0000000000..6cfa92f4c2 --- /dev/null +++ b/schema/crdb/19.0.0/up01.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.external_ip ADD COLUMN IF NOT EXISTS project_id UUID; diff --git a/schema/crdb/19.0.0/up02.sql b/schema/crdb/19.0.0/up02.sql new file mode 100644 index 0000000000..733c46b0dc --- /dev/null +++ b/schema/crdb/19.0.0/up02.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.external_ip ADD CONSTRAINT IF NOT EXISTS null_project_id CHECK ( + (kind = 'floating' AND is_service = FALSE AND project_id IS NOT NULL) OR + ((kind != 'floating' OR is_service = TRUE) AND project_id IS NULL) +); diff --git a/schema/crdb/19.0.0/up03.sql b/schema/crdb/19.0.0/up03.sql new file mode 100644 index 0000000000..d3577edc12 --- /dev/null +++ b/schema/crdb/19.0.0/up03.sql @@ -0,0 +1,6 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name on omicron.public.external_ip ( + name +) WHERE + kind = 'floating' AND + time_deleted is NULL AND + project_id is NULL; diff --git a/schema/crdb/19.0.0/up04.sql b/schema/crdb/19.0.0/up04.sql new file mode 100644 index 0000000000..9a40dc99c5 --- /dev/null +++ b/schema/crdb/19.0.0/up04.sql @@ -0,0 +1,7 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name_and_project on omicron.public.external_ip ( + project_id, + name +) WHERE + kind = 'floating' AND + time_deleted is NULL AND + project_id is NOT NULL; diff --git a/schema/crdb/19.0.0/up05.sql b/schema/crdb/19.0.0/up05.sql new file mode 100644 index 0000000000..3e172e3e70 --- /dev/null +++ b/schema/crdb/19.0.0/up05.sql @@ -0,0 +1,19 @@ +CREATE VIEW IF NOT EXISTS omicron.public.floating_ip AS +SELECT + id, + name, + description, + time_created, + time_modified, + time_deleted, + ip_pool_id, + ip_pool_range_id, + is_service, + parent_id, + ip, + project_id +FROM + omicron.public.external_ip +WHERE + omicron.public.external_ip.kind = 'floating' AND + project_id IS NOT NULL; diff --git a/schema/crdb/19.0.0/up06.sql b/schema/crdb/19.0.0/up06.sql new file mode 100644 index 0000000000..30c0b3773a --- /dev/null +++ b/schema/crdb/19.0.0/up06.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.external_ip ADD CONSTRAINT IF NOT EXISTS null_non_fip_parent_id CHECK ( + (kind != 'floating' AND parent_id is NOT NULL) OR (kind = 'floating') +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f82829a2d9..0bf365a2f1 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1662,6 +1662,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( /* The last port in the allowed range, also inclusive. */ last_port INT4 NOT NULL, + /* FK to the `project` table. */ + project_id UUID, + /* The name must be non-NULL iff this is a floating IP. */ CONSTRAINT null_fip_name CHECK ( (kind != 'floating' AND name IS NULL) OR @@ -1674,6 +1677,14 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( (kind = 'floating' AND description IS NOT NULL) ), + /* Only floating IPs can be attached to a project, and + * they must have a parent project if they are instance FIPs. + */ + CONSTRAINT null_project_id CHECK ( + (kind = 'floating' AND is_service = FALSE AND project_id is NOT NULL) OR + ((kind != 'floating' OR is_service = TRUE) AND project_id IS NULL) + ), + /* * Only nullable if this is a floating IP, which may exist not * attached to any instance or service yet. @@ -1717,6 +1728,43 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_ip_by_parent ON omicron.public ) WHERE parent_id IS NOT NULL AND time_deleted IS NULL; +/* Enforce name-uniqueness of floating (service) IPs at fleet level. */ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name on omicron.public.external_ip ( + name +) WHERE + kind = 'floating' AND + time_deleted is NULL AND + project_id is NULL; + +/* Enforce name-uniqueness of floating IPs at project level. */ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name_and_project on omicron.public.external_ip ( + project_id, + name +) WHERE + kind = 'floating' AND + time_deleted is NULL AND + project_id is NOT NULL; + +CREATE VIEW IF NOT EXISTS omicron.public.floating_ip AS +SELECT + id, + name, + description, + time_created, + time_modified, + time_deleted, + ip_pool_id, + ip_pool_range_id, + is_service, + parent_id, + ip, + project_id +FROM + omicron.public.external_ip +WHERE + omicron.public.external_ip.kind = 'floating' AND + project_id IS NOT NULL; + /*******************************************************************/ /* @@ -3014,7 +3062,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '18.0.0', NULL) + ( TRUE, NOW(), NOW(), '19.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index c37f0ffde6..a811678a48 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -208,7 +208,8 @@ struct InstanceInner { // Guest NIC and OPTE port information requested_nics: Vec, source_nat: SourceNatConfig, - external_ips: Vec, + ephemeral_ip: Option, + floating_ips: Vec, firewall_rules: Vec, dhcp_config: DhcpCfg, @@ -669,7 +670,8 @@ impl Instance { port_manager, requested_nics: hardware.nics, source_nat: hardware.source_nat, - external_ips: hardware.external_ips, + ephemeral_ip: hardware.ephemeral_ip, + floating_ips: hardware.floating_ips, firewall_rules: hardware.firewall_rules, dhcp_config, requested_disks: hardware.disks, @@ -882,15 +884,20 @@ impl Instance { // Create OPTE ports for the instance let mut opte_ports = Vec::with_capacity(inner.requested_nics.len()); for nic in inner.requested_nics.iter() { - let (snat, external_ips) = if nic.primary { - (Some(inner.source_nat), &inner.external_ips[..]) + let (snat, ephemeral_ip, floating_ips) = if nic.primary { + ( + Some(inner.source_nat), + inner.ephemeral_ip, + &inner.floating_ips[..], + ) } else { - (None, &[][..]) + (None, None, &[][..]) }; let port = inner.port_manager.create_port( nic, snat, - external_ips, + ephemeral_ip, + floating_ips, &inner.firewall_rules, inner.dhcp_config.clone(), )?; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 6be2ceabbd..a7d91e2b93 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -70,7 +70,8 @@ pub struct InstanceHardware { pub source_nat: SourceNatConfig, /// Zero or more external IP addresses (either floating or ephemeral), /// provided to an instance to allow inbound connectivity. - pub external_ips: Vec, + pub ephemeral_ip: Option, + pub floating_ips: Vec, pub firewall_rules: Vec, pub dhcp_config: DhcpConfig, // TODO: replace `propolis_client::*` with locally-modeled request type diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index dc309e8423..fb6de8d38a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1167,7 +1167,7 @@ impl ServiceManager { .collect(); let external_ip; - let (zone_type_str, nic, snat, external_ips) = match &zone_args + let (zone_type_str, nic, snat, floating_ips) = match &zone_args .omicron_type() { Some( @@ -1207,16 +1207,18 @@ impl ServiceManager { // config allows outbound access which is enough for // Boundary NTP which needs to come up before Nexus. let port = port_manager - .create_port(nic, snat, external_ips, &[], DhcpCfg::default()) + .create_port(nic, snat, None, floating_ips, &[], DhcpCfg::default()) .map_err(|err| Error::ServicePortCreation { service: zone_type_str.clone(), err: Box::new(err), })?; // We also need to update the switch with the NAT mappings + // XXX: need to revisit iff. any services get more than one + // address. let (target_ip, first_port, last_port) = match snat { Some(s) => (s.ip, s.first_port, s.last_port), - None => (external_ips[0], 0, u16::MAX), + None => (floating_ips[0], 0, u16::MAX), }; for dpd_client in &dpd_clients { diff --git a/tools/opte_version b/tools/opte_version index 0a79a6aba9..fa0ef8d768 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.25.183 +0.27.199 From c5811635125d50147ce353897e6e9225a1745df1 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 6 Dec 2023 15:59:41 -0800 Subject: [PATCH 059/186] add a helper for querying the database in batches (#4632) --- nexus/db-queries/src/db/datastore/dns.rs | 29 ++-- nexus/db-queries/src/db/mod.rs | 3 +- nexus/db-queries/src/db/pagination.rs | 184 +++++++++++++++++++++++ 3 files changed, 196 insertions(+), 20 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index cfd25d6a4f..552ad31487 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -15,6 +15,7 @@ use crate::db::model::DnsZone; use crate::db::model::Generation; use crate::db::model::InitialDnsGroup; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use crate::db::TransactionError; use async_bb8_diesel::AsyncConnection; @@ -242,9 +243,8 @@ impl DataStore { let mut zones = Vec::with_capacity(dns_zones.len()); for zone in dns_zones { let mut zone_records = Vec::new(); - let mut marker = None; - - loop { + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { debug!(log, "listing DNS names for zone"; "dns_zone_id" => zone.id.to_string(), "dns_zone_name" => &zone.zone_name, @@ -252,25 +252,16 @@ impl DataStore { "found_so_far" => zone_records.len(), "batch_size" => batch_size.get(), ); - let pagparams = DataPageParams { - marker: marker.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: batch_size, - }; let names_batch = self - .dns_names_list(opctx, zone.id, version.version, &pagparams) + .dns_names_list( + opctx, + zone.id, + version.version, + &p.current_pagparams(), + ) .await?; - let done = names_batch.len() - < usize::try_from(batch_size.get()).unwrap(); - if let Some((last_name, _)) = names_batch.last() { - marker = Some(last_name.clone()); - } else { - assert!(done); - } + paginator = p.found_batch(&names_batch, &|(n, _)| n.clone()); zone_records.extend(names_batch.into_iter()); - if done { - break; - } } debug!(log, "found all DNS names for zone"; diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index e6b8743e94..924eab363f 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -21,7 +21,8 @@ pub(crate) mod error; mod explain; pub mod fixed_data; pub mod lookup; -mod pagination; +// Public for doctests. +pub mod pagination; mod pool; // This is marked public because the error types are used elsewhere, e.g., in // sagas. diff --git a/nexus/db-queries/src/db/pagination.rs b/nexus/db-queries/src/db/pagination.rs index dd7daab14f..4fc1cf5966 100644 --- a/nexus/db-queries/src/db/pagination.rs +++ b/nexus/db-queries/src/db/pagination.rs @@ -16,6 +16,7 @@ use diesel::AppearsOnTable; use diesel::Column; use diesel::{ExpressionMethods, QueryDsl}; use omicron_common::api::external::DataPageParams; +use std::num::NonZeroU32; // Shorthand alias for "the SQL type of the whole table". type TableSqlType = ::SqlType; @@ -169,6 +170,145 @@ where } } +/// Helper for querying a large number of records from the database in batches +/// +/// Without this helper: a typical way to perform paginated queries would be to +/// invoke some existing "list" function in the datastore that itself is +/// paginated. Such functions accept a `pagparams: &DataPageParams` argument +/// that uses a marker to identify where the next page of results starts. For +/// the first call, the marker inside `pagparams` is `None`. For subsequent +/// calls, it's typically some field from the last item returned in the previous +/// page. You're finished when you get a result set smaller than the batch +/// size. +/// +/// This helper takes care of most of the logic for you. To use this, you first +/// create a `Paginator` with a specific batch_size. Then you call `next()` in +/// a loop. Each iteration will provide you with a `DataPageParams` to use to +/// call your list function. When you've fetched the next page, you have to +/// let the helper look at it to determine if there's another page to fetch and +/// what marker to use. +/// +/// ## Example +/// +/// ``` +/// use nexus_db_queries::db::pagination::Paginator; +/// use omicron_common::api::external::DataPageParams; +/// +/// let batch_size = std::num::NonZeroU32::new(3).unwrap(); +/// +/// // Assume you've got an existing paginated "list items" function. +/// // This simple implementation returns a few full batches, then a partial +/// // batch. +/// type Marker = u32; +/// type Item = u32; +/// let do_query = |pagparams: &DataPageParams<'_, Marker> | { +/// match pagparams.marker { +/// None => (0..batch_size.get()).collect(), +/// Some(x) if *x < 2 * batch_size.get() => (x+1..x+1+batch_size.get()).collect(), +/// Some(x) => vec![*x + 1], +/// } +/// }; +/// +/// // This closure translates from one of the returned item to the field in +/// // that item that servers as the marker. This example is contrived. +/// let item2marker: &dyn Fn(&Item) -> Marker = &|u: &u32| *u; +/// +/// let mut all_records = Vec::new(); +/// let mut paginator = Paginator::new(batch_size); +/// while let Some(p) = paginator.next() { +/// let records_batch = do_query(&p.current_pagparams()); +/// paginator = p.found_batch(&records_batch, item2marker); +/// all_records.extend(records_batch.into_iter()); +/// } +/// +/// // Results are in `all_records`. +/// assert_eq!(all_records, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); +/// ``` +/// +/// ## Design notes +/// +/// The separation of `Paginator` and `PaginatorHelper` is aimed at making it +/// harder to misuse this interface. We could skip the helper altogether and +/// just have `Paginator::next()` return the DatePageParams directly. But you'd +/// still need a `Paginator::found_batch()`. And it would be easy to forget to +/// call this, leading to an infinite loop at runtime. To avoid this mistake, +/// `Paginator::next()` consumes `self`. You can't get another `Paginator` back +/// until you use `PaginatorHelper::found_batch()`. That also consumes `self` +/// so that you can't keep using the old `DataPageParams`. +pub struct Paginator { + batch_size: NonZeroU32, + state: PaginatorState, +} + +impl Paginator { + pub fn new(batch_size: NonZeroU32) -> Paginator { + Paginator { batch_size, state: PaginatorState::Initial } + } + + pub fn next(self) -> Option> { + match self.state { + PaginatorState::Initial => Some(PaginatorHelper { + batch_size: self.batch_size, + marker: None, + }), + PaginatorState::Middle { marker } => Some(PaginatorHelper { + batch_size: self.batch_size, + marker: Some(marker), + }), + PaginatorState::Done => None, + } + } +} + +enum PaginatorState { + Initial, + Middle { marker: N }, + Done, +} + +pub struct PaginatorHelper { + batch_size: NonZeroU32, + marker: Option, +} + +impl PaginatorHelper { + /// Returns the `DatePageParams` to use to fetch the next page of results + pub fn current_pagparams(&self) -> DataPageParams<'_, N> { + DataPageParams { + marker: self.marker.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: self.batch_size, + } + } + + /// Report a page of results + /// + /// This function looks at the returned results to determine whether we've + /// finished iteration or whether we need to fetch another page (and if so, + /// this determines the marker for the next fetch operation). + /// + /// This function returns a `Paginator` used to make the next request. See + /// the example on `Paginator` for usage. + pub fn found_batch( + self, + batch: &[T], + item2marker: &dyn Fn(&T) -> N, + ) -> Paginator { + let state = + if batch.len() < usize::try_from(self.batch_size.get()).unwrap() { + PaginatorState::Done + } else { + // self.batch_size is non-zero, so if we got at least that many + // items, then there's at least one. + let last = batch.iter().last().unwrap(); + let marker = item2marker(last); + PaginatorState::Middle { marker } + }; + + Paginator { batch_size: self.batch_size, state } + } +} + #[cfg(test)] mod test { use super::*; @@ -433,4 +573,48 @@ mod test { let _ = db.cleanup().await; logctx.cleanup_successful(); } + + #[test] + fn test_paginator() { + // The doctest exercises a basic case for Paginator. Here we test some + // edge cases. + let batch_size = std::num::NonZeroU32::new(3).unwrap(); + + type Marker = u32; + #[derive(Debug, PartialEq, Eq)] + struct Item { + value: String, + marker: Marker, + } + + let do_list = + |query: &dyn Fn(&DataPageParams<'_, Marker>) -> Vec| { + let mut all_records = Vec::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let records_batch = query(&p.current_pagparams()); + paginator = + p.found_batch(&records_batch, &|i: &Item| i.marker); + all_records.extend(records_batch.into_iter()); + } + all_records + }; + + fn mkitem(v: u32) -> Item { + Item { value: v.to_string(), marker: v } + } + + // Trivial case: first page is empty + assert_eq!(Vec::::new(), do_list(&|_| Vec::new())); + + // Exactly one batch-size worth of items + // (exercises the cases where the last non-empty batch is full, and + // where any batch is empty) + let my_query = + |pagparams: &DataPageParams<'_, Marker>| match &pagparams.marker { + None => (0..batch_size.get()).map(mkitem).collect(), + Some(_) => Vec::new(), + }; + assert_eq!(vec![mkitem(0), mkitem(1), mkitem(2)], do_list(&my_query)); + } } From e0882aeaf1b5d5434f3cd46246c31260d8f6ae88 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 6 Dec 2023 17:26:18 -0800 Subject: [PATCH 060/186] [sled-agent] address EarlyNetworkConfig issues from 20231130 dogfood mupdate (#4636) * Log deserialization as both v1 and v0 failing. * If an error occurs with deserialization as both v1 and v0, then preferentially return the error produced by v1 rather than v0. * Add tests for bootstore blobs from earlier versions. If more fields are added in the future, this test should catch any issues that occur. --- sled-agent/src/bootstrap/early_networking.rs | 104 ++++++++---- sled-agent/src/http_entrypoints.rs | 5 +- sled-agent/src/sled_agent.rs | 7 +- sled-agent/tests/data/early_network_blobs.txt | 2 + .../tests/integration_tests/early_network.rs | 154 ++++++++++++++++++ sled-agent/tests/integration_tests/mod.rs | 1 + 6 files changed, 236 insertions(+), 37 deletions(-) create mode 100644 sled-agent/tests/data/early_network_blobs.txt create mode 100644 sled-agent/tests/integration_tests/early_network.rs diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index cb411a2546..4216a418c6 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -682,6 +682,65 @@ pub struct EarlyNetworkConfig { pub body: EarlyNetworkConfigBody, } +impl EarlyNetworkConfig { + // Note: This currently only converts between v0 and v1 or deserializes v1 of + // `EarlyNetworkConfig`. + pub fn deserialize_bootstore_config( + log: &Logger, + config: &bootstore::NetworkConfig, + ) -> Result { + // Try to deserialize the latest version of the data structure (v1). If + // that succeeds we are done. + let v1_error = + match serde_json::from_slice::(&config.blob) { + Ok(val) => return Ok(val), + Err(error) => { + // Log this error and continue trying to deserialize older + // versions. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v1, trying next as v0: {}", + error, + ); + error + } + }; + + match serde_json::from_slice::(&config.blob) { + Ok(val) => { + // Convert from v0 to v1 + return Ok(EarlyNetworkConfig { + generation: val.generation, + schema_version: 1, + body: EarlyNetworkConfigBody { + ntp_servers: val.ntp_servers, + rack_network_config: val.rack_network_config.map( + |v0_config| { + RackNetworkConfigV0::to_v1( + val.rack_subnet, + v0_config, + ) + }, + ), + }, + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig as v0: {}", error, + ); + } + }; + + // Return the v1 error preferentially over the v0 error as it's more + // likely to be useful. + Err(v1_error) + } +} + /// This is the actual configuration of EarlyNetworking. /// /// We nest it below the "header" of `generation` and `schema_version` so that @@ -711,39 +770,6 @@ impl From for bootstore::NetworkConfig { } } -// Note: This currently only converts between v0 and v1 or deserializes v1 of -// `EarlyNetworkConfig`. -impl TryFrom for EarlyNetworkConfig { - type Error = serde_json::Error; - - fn try_from( - value: bootstore::NetworkConfig, - ) -> std::result::Result { - // Try to deserialize the latest version of the data structure (v1). If - // that succeeds we are done. - if let Ok(val) = - serde_json::from_slice::(&value.blob) - { - return Ok(val); - } - - // We don't have the latest version. Try to deserialize v0 and then - // convert it to the latest version. - let v0 = serde_json::from_slice::(&value.blob)?; - - Ok(EarlyNetworkConfig { - generation: v0.generation, - schema_version: 1, - body: EarlyNetworkConfigBody { - ntp_servers: v0.ntp_servers, - rack_network_config: v0.rack_network_config.map(|v0_config| { - RackNetworkConfigV0::to_v1(v0.rack_subnet, v0_config) - }), - }, - }) - } -} - /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to /// /// @@ -815,9 +841,13 @@ fn convert_fec(fec: &PortFec) -> dpd_client::types::PortFec { mod tests { use super::*; use omicron_common::api::internal::shared::RouteConfig; + use omicron_test_utils::dev::test_setup_log; #[test] fn serialized_early_network_config_v0_to_v1_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v0_to_v1_conversion", + ); let v0 = EarlyNetworkConfigV0 { generation: 1, rack_subnet: Ipv6Addr::UNSPECIFIED, @@ -841,7 +871,11 @@ mod tests { let bootstore_conf = bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; - let v1 = EarlyNetworkConfig::try_from(bootstore_conf).unwrap(); + let v1 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); let v0_rack_network_config = v0.rack_network_config.unwrap(); let uplink = v0_rack_network_config.uplinks[0].clone(); let expected = EarlyNetworkConfig { @@ -872,5 +906,7 @@ mod tests { }; assert_eq!(expected, v1); + + logctx.cleanup_successful(); } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 9c3a079dac..2dcb35b77e 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -659,7 +659,10 @@ async fn read_network_bootstore_config_cache( })?; let config = match config { - Some(config) => EarlyNetworkConfig::try_from(config).map_err(|e| { + Some(config) => EarlyNetworkConfig::deserialize_bootstore_config( + &rqctx.log, &config, + ) + .map_err(|e| { HttpError::for_internal_error(format!( "deserialize early network config: {e}" )) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 90e9706198..57aea61ae9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -445,8 +445,11 @@ impl SledAgent { })?; let early_network_config = - EarlyNetworkConfig::try_from(serialized_config) - .map_err(|err| BackoffError::transient(err.to_string()))?; + EarlyNetworkConfig::deserialize_bootstore_config( + &log, + &serialized_config, + ) + .map_err(|err| BackoffError::transient(err.to_string()))?; Ok(early_network_config.body.rack_network_config) }; diff --git a/sled-agent/tests/data/early_network_blobs.txt b/sled-agent/tests/data/early_network_blobs.txt new file mode 100644 index 0000000000..c968d4010b --- /dev/null +++ b/sled-agent/tests/data/early_network_blobs.txt @@ -0,0 +1,2 @@ +2023-11-30 mupdate failing blob,{"generation":15,"schema_version":1,"body":{"ntp_servers":[],"rack_network_config":{"rack_subnet":"fd00:1122:3344:100::/56","infra_ip_first":"0.0.0.0","infra_ip_last":"0.0.0.0","ports":[{"routes":[],"addresses":[],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]},{"routes":[],"addresses":["172.20.15.53/29"],"switch":"switch1","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.51","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":["172.20.15.45/29"],"switch":"switch0","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.43","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":[],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]}],"bgp":[{"asn":65002,"originate":["172.20.26.0/24"]},{"asn":65002,"originate":["172.20.26.0/24"]}]}}} +2023-12-06 config,{"generation":20,"schema_version":1,"body":{"ntp_servers":["ntp.example.com"],"rack_network_config":{"rack_subnet":"ff01::/32","infra_ip_first":"127.0.0.1","infra_ip_last":"127.1.0.1","ports":[{"routes":[{"destination":"10.1.9.32/16","nexthop":"10.1.9.32"}],"addresses":["2001:db8::/96"],"switch":"switch0","port":"foo","uplink_port_speed":"speed200_g","uplink_port_fec":"firecode","bgp_peers":[{"asn":65000,"port":"bar","addr":"1.2.3.4","hold_time":20,"idle_hold_time":50,"delay_open":null,"connect_retry":30,"keepalive":10}],"autoneg":true}],"bgp":[{"asn":20000,"originate":["192.168.0.0/24"]}]}}} diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs new file mode 100644 index 0000000000..c3a4a53ebf --- /dev/null +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -0,0 +1,154 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests that EarlyNetworkConfig deserializes across versions. + +use std::net::Ipv4Addr; + +use bootstore::schemes::v0 as bootstore; +use omicron_common::api::{ + external::SwitchLocation, + internal::shared::{ + BgpConfig, BgpPeerConfig, PortConfigV1, PortFec, PortSpeed, + RackNetworkConfig, RouteConfig, + }, +}; +use omicron_sled_agent::bootstrap::early_networking::{ + EarlyNetworkConfig, EarlyNetworkConfigBody, +}; +use omicron_test_utils::dev::test_setup_log; + +const BLOB_PATH: &str = "tests/data/early_network_blobs.txt"; + +/// Test that previous and current versions of `EarlyNetworkConfig` blobs +/// deserialize correctly. +#[test] +fn early_network_blobs_deserialize() { + let logctx = test_setup_log("early_network_blobs_deserialize"); + + let (current_desc, current_config) = current_config_example(); + assert!( + !current_desc.contains(',') && !current_desc.contains('\n'), + "current_desc must not contain commas or newlines" + ); + + // Read old blobs as newline-delimited JSON. + let mut known_blobs = std::fs::read_to_string(BLOB_PATH) + .expect("error reading early_network_blobs.txt"); + let mut current_blob_is_known = false; + for (blob_idx, line) in known_blobs.lines().enumerate() { + let blob_lineno = blob_idx + 1; + let (blob_desc, blob_json) = + line.split_once(',').unwrap_or_else(|| { + panic!( + "error parsing early_network_blobs.txt \ + line {blob_lineno}: missing comma", + ); + }); + + // Attempt to deserialize this blob. + let config = serde_json::from_str::(blob_json) + .unwrap_or_else(|error| { + panic!( + "error deserializing early_network_blobs.txt \ + \"{blob_desc}\" (line {blob_lineno}): {error}", + ); + }); + + // Does this config match the current config? + if blob_desc == current_desc { + assert_eq!( + config, current_config, + "early_network_blobs.txt line {}: {} does not match current config", + blob_lineno, blob_desc + ); + current_blob_is_known = true; + } + + // Now attempt to put this blob into a bootstore config, and deserialize that. + let network_config = bootstore::NetworkConfig { + generation: config.generation, + blob: blob_json.to_owned().into(), + }; + let config2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &network_config, + ).unwrap_or_else(|error| { + panic!( + "error deserializing early_network_blobs.txt \ + \"{blob_desc}\" (line {blob_lineno}) as bootstore config: {error}", + ); + }); + + assert_eq!( + config, config2, + "early_network_blobs.txt line {}: {} does not match deserialization \ + as bootstore config", + blob_lineno, blob_desc + ); + } + + // If the current blob was not covered, add it to the list of known blobs. + if !current_blob_is_known { + let current_blob_json = serde_json::to_string(¤t_config).unwrap(); + let current_blob = format!("{},{}", current_desc, current_blob_json); + known_blobs.push_str(¤t_blob); + known_blobs.push('\n'); + } + + expectorate::assert_contents(BLOB_PATH, &known_blobs); + + logctx.cleanup_successful(); +} + +/// Returns a current version of the EarlyNetworkConfig blob, along with a +/// short description of the current version. The values can be arbitrary, but +/// this should be a nontrivial blob where no vectors are empty. +/// +/// The goal is that if the definition of `EarlyNetworkConfig` changes in the +/// future, older blobs can still be deserialized correctly. +fn current_config_example() -> (&'static str, EarlyNetworkConfig) { + // NOTE: the description must not contain commas or newlines. + let description = "2023-12-06 config"; + let config = EarlyNetworkConfig { + generation: 20, + schema_version: 1, + body: EarlyNetworkConfigBody { + ntp_servers: vec!["ntp.example.com".to_owned()], + rack_network_config: Some(RackNetworkConfig { + rack_subnet: "ff01::0/32".parse().unwrap(), + infra_ip_first: Ipv4Addr::new(127, 0, 0, 1), + infra_ip_last: Ipv4Addr::new(127, 1, 0, 1), + ports: vec![PortConfigV1 { + routes: vec![RouteConfig { + destination: "10.1.9.32/16".parse().unwrap(), + nexthop: "10.1.9.32".parse().unwrap(), + }], + addresses: vec!["2001:db8::/96".parse().unwrap()], + switch: SwitchLocation::Switch0, + port: "foo".to_owned(), + uplink_port_speed: PortSpeed::Speed200G, + uplink_port_fec: PortFec::Firecode, + bgp_peers: vec![BgpPeerConfig { + asn: 65000, + port: "bar".to_owned(), + addr: Ipv4Addr::new(1, 2, 3, 4), + hold_time: Some(20), + idle_hold_time: Some(50), + delay_open: None, + connect_retry: Some(30), + keepalive: Some(10), + }], + autoneg: true, + }], + bgp: vec![BgpConfig { + asn: 20000, + originate: vec!["192.168.0.0/24".parse().unwrap()], + }], + }), + }, + }; + + (description, config) +} diff --git a/sled-agent/tests/integration_tests/mod.rs b/sled-agent/tests/integration_tests/mod.rs index 1bf43dc00c..13e38077ea 100644 --- a/sled-agent/tests/integration_tests/mod.rs +++ b/sled-agent/tests/integration_tests/mod.rs @@ -3,3 +3,4 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. mod commands; +mod early_network; From 7f9d64deb102a3d3878c8040e43bac0ca0eba567 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 6 Dec 2023 17:37:49 -0800 Subject: [PATCH 061/186] API updates for oxidecomputer/maghemite#88 fix (#4620) --- .../app/sagas/switch_port_settings_common.rs | 79 ++++++++++------ nexus/src/app/switch_port.rs | 11 ++- package-manifest.toml | 8 +- sled-agent/src/bootstrap/early_networking.rs | 93 ++++++++++++------- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- 7 files changed, 127 insertions(+), 74 deletions(-) diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index 9132645782..9ef23ebf44 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -444,7 +444,9 @@ pub(crate) async fn ensure_switch_port_bgp_settings( |e| ActionError::action_failed(format!("select mg client: {e}")), )?; - let mut bgp_peer_configs = Vec::new(); + let mut bgp_peer_configs = HashMap::>::new(); + + let mut cfg: Option = None; for peer in settings.bgp_peers { let config = nexus @@ -454,11 +456,44 @@ pub(crate) async fn ensure_switch_port_bgp_settings( ActionError::action_failed(format!("get bgp config: {e}")) })?; + if let Some(cfg) = &cfg { + if config.asn != cfg.asn { + return Err(ActionError::action_failed( + "bad request: only one AS allowed per switch".to_string(), + )); + } + } else { + cfg = Some(config); + } + + let bpc = BgpPeerConfig { + name: format!("{}", peer.addr.ip()), //TODO user defined name? + host: format!("{}:179", peer.addr.ip()), + hold_time: peer.hold_time.0.into(), + idle_hold_time: peer.idle_hold_time.0.into(), + delay_open: peer.delay_open.0.into(), + connect_retry: peer.connect_retry.0.into(), + keepalive: peer.keepalive.0.into(), + resolution: BGP_SESSION_RESOLUTION, + passive: false, + }; + + match bgp_peer_configs.get_mut(&switch_port_name) { + Some(peers) => { + peers.push(bpc); + } + None => { + bgp_peer_configs.insert(switch_port_name.clone(), vec![bpc]); + } + } + } + + if let Some(cfg) = &cfg { let announcements = nexus .bgp_announce_list( &opctx, ¶ms::BgpAnnounceSetSelector { - name_or_id: NameOrId::Id(config.bgp_announce_set_id), + name_or_id: NameOrId::Id(cfg.bgp_announce_set_id), }, ) .await @@ -473,39 +508,25 @@ pub(crate) async fn ensure_switch_port_bgp_settings( let value = match a.network.ip() { IpAddr::V4(value) => Ok(value), IpAddr::V6(_) => Err(ActionError::action_failed( - "IPv6 announcement not yet supported".to_string(), + "bad request: IPv6 announcement not yet supported" + .to_string(), )), }?; prefixes.push(Prefix4 { value, length: a.network.prefix() }); } - - let bpc = BgpPeerConfig { - asn: *config.asn, - name: format!("{}", peer.addr.ip()), //TODO user defined name? - host: format!("{}:179", peer.addr.ip()), - hold_time: peer.hold_time.0.into(), - idle_hold_time: peer.idle_hold_time.0.into(), - delay_open: peer.delay_open.0.into(), - connect_retry: peer.connect_retry.0.into(), - keepalive: peer.keepalive.0.into(), - resolution: BGP_SESSION_RESOLUTION, - originate: prefixes, - }; - - bgp_peer_configs.push(bpc); + mg_client + .inner + .bgp_apply(&ApplyRequest { + asn: cfg.asn.0, + peers: bgp_peer_configs, + originate: prefixes, + }) + .await + .map_err(|e| { + ActionError::action_failed(format!("apply bgp settings: {e}")) + })?; } - mg_client - .inner - .bgp_apply(&ApplyRequest { - peer_group: switch_port_name, - peers: bgp_peer_configs, - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("apply bgp settings: {e}")) - })?; - Ok(()) } diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index acc57459fd..b9f0f94fa0 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -117,7 +117,6 @@ impl super::Nexus { .map_err(|e| { let msg = e.to_string(); if msg.contains("bad request") { - //return HttpError::for_client_error(None, StatusCode::BAD_REQUEST, msg.to_string()) external::Error::invalid_request(&msg.to_string()) } else { e @@ -255,7 +254,15 @@ impl super::Nexus { >( saga_params, ) - .await?; + .await + .map_err(|e| { + let msg = e.to_string(); + if msg.contains("bad request") { + external::Error::invalid_request(&msg.to_string()) + } else { + e + } + })?; Ok(()) } diff --git a/package-manifest.toml b/package-manifest.toml index 37ae1100f8..49f202089c 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -425,7 +425,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" +source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" @@ -441,7 +441,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" +source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" @@ -456,10 +456,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" +source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" +source.sha256 = "802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 4216a418c6..75958a2f37 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -22,8 +22,8 @@ use mg_admin_client::Client as MgdClient; use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; use omicron_common::api::internal::shared::{ - PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, - SwitchLocation, UplinkConfig, + BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, + RackNetworkConfigV1, SwitchLocation, UplinkConfig, }; use omicron_common::backoff::{ retry_notify, retry_policy_local, BackoffError, ExponentialBackoff, @@ -472,23 +472,37 @@ impl<'a> EarlyNetworkSetup<'a> { )) })?; + let mut config: Option = None; + let mut bgp_peer_configs = HashMap::>::new(); + // Iterate through ports and apply BGP config. for port in &our_ports { - let mut bgp_peer_configs = Vec::new(); for peer in &port.bgp_peers { - let config = rack_network_config - .bgp - .iter() - .find(|x| x.asn == peer.asn) - .ok_or(EarlyNetworkSetupError::BgpConfigurationError( - format!( - "asn {} referenced by peer undefined", - peer.asn - ), - ))?; + if let Some(config) = &config { + if peer.asn != config.asn { + return Err(EarlyNetworkSetupError::BadConfig( + "only one ASN per switch is supported".into(), + )); + } + } else { + config = Some( + rack_network_config + .bgp + .iter() + .find(|x| x.asn == peer.asn) + .ok_or( + EarlyNetworkSetupError::BgpConfigurationError( + format!( + "asn {} referenced by peer undefined", + peer.asn + ), + ), + )? + .clone(), + ); + } let bpc = BgpPeerConfig { - asn: peer.asn, name: format!("{}", peer.addr), host: format!("{}:179", peer.addr), hold_time: peer.hold_time.unwrap_or(6), @@ -497,30 +511,41 @@ impl<'a> EarlyNetworkSetup<'a> { connect_retry: peer.connect_retry.unwrap_or(3), keepalive: peer.keepalive.unwrap_or(2), resolution: BGP_SESSION_RESOLUTION, - originate: config - .originate - .iter() - .map(|x| Prefix4 { length: x.prefix(), value: x.ip() }) - .collect(), + passive: false, }; - bgp_peer_configs.push(bpc); + match bgp_peer_configs.get_mut(&port.port) { + Some(peers) => { + peers.push(bpc); + } + None => { + bgp_peer_configs.insert(port.port.clone(), vec![bpc]); + } + } } + } - if bgp_peer_configs.is_empty() { - continue; + if !bgp_peer_configs.is_empty() { + if let Some(config) = &config { + mgd.inner + .bgp_apply(&ApplyRequest { + asn: config.asn, + peers: bgp_peer_configs, + originate: config + .originate + .iter() + .map(|x| Prefix4 { + length: x.prefix(), + value: x.ip(), + }) + .collect(), + }) + .await + .map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "BGP peer configuration failed: {e}", + )) + })?; } - - mgd.inner - .bgp_apply(&ApplyRequest { - peer_group: port.port.clone(), - peers: bgp_peer_configs, - }) - .await - .map_err(|e| { - EarlyNetworkSetupError::BgpConfigurationError(format!( - "BGP peer configuration failed: {e}", - )) - })?; } Ok(our_ports) diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index f60ea76380..37c099d7f5 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" +COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 649db53f6e..329c05fc42 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" -SHA2="6c1fab8d5028b52a161d8bf02aae47844699cdc5f7b28e1ac519fc4ec1ab3971" +COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" +SHA2="931efa310d972b1f8afba2308751fc6a2035afbaebba77b3a40a8358c123ba3c" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 08b04d6b67..1d3cf98f94 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" -MGD_LINUX_SHA256="81231b30872fa1c581aa22c101f32d11f33f335758ac1fd2653436fbc7aab93f" \ No newline at end of file +CIDL_SHA256="802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" +MGD_LINUX_SHA256="1bcadfd700902e3640843e0bb53d3defdbcd8d86c3279efa0953ae8d6437e2b0" \ No newline at end of file From 75cd9019387edb4a5fcd78015433938513ed51dc Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:39:38 -0800 Subject: [PATCH 062/186] Update russh monorepo to 0.40.0 (#4624) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 84 +++++++++++++++++++++++++++++++++---- end-to-end-tests/Cargo.toml | 4 +- workspace-hack/Cargo.toml | 16 ++++++- 3 files changed, 93 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a126f82300..16636a75d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1982,6 +1982,20 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555" +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest", + "elliptic-curve", + "rfc6979", + "signature 2.1.0", + "spki", +] + [[package]] name = "ed25519" version = "1.5.3" @@ -2023,9 +2037,9 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "elliptic-curve" -version = "0.13.5" +version = "0.13.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "968405c8fdc9b3bf4df0a6638858cc0b52462836ab6b1c87377785dd09cf1c0b" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" dependencies = [ "base16ct", "crypto-bigint", @@ -2034,7 +2048,10 @@ dependencies = [ "generic-array", "group", "hkdf", + "pem-rfc7468", + "pkcs8", "rand_core 0.6.4", + "sec1", "subtle", "zeroize", ] @@ -4955,6 +4972,7 @@ name = "omicron-workspace-hack" version = "0.1.0" dependencies = [ "anyhow", + "base16ct", "bit-set", "bit-vec", "bitflags 1.3.2", @@ -4973,10 +4991,13 @@ dependencies = [ "crossbeam-utils", "crossterm", "crypto-common", + "der", "diesel", "digest", "either", + "elliptic-curve", "errno", + "ff", "flate2", "futures", "futures-channel", @@ -4988,8 +5009,10 @@ dependencies = [ "gateway-messages", "generic-array", "getrandom 0.2.10", + "group", "hashbrown 0.13.2", "hex", + "hmac", "hyper", "hyper-rustls", "indexmap 2.1.0", @@ -5009,6 +5032,7 @@ dependencies = [ "num-traits", "once_cell", "openapiv3", + "pem-rfc7468", "petgraph", "postgres-types", "ppv-lite86", @@ -5027,7 +5051,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "signature 2.1.0", "similar", "slog", "snafu", @@ -5446,6 +5469,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2", +] + [[package]] name = "packed_struct" version = "0.10.1" @@ -6022,6 +6057,15 @@ dependencies = [ "syn 2.0.32", ] +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -6638,6 +6682,16 @@ dependencies = [ "quick-error", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + [[package]] name = "ring" version = "0.16.20" @@ -6755,9 +6809,9 @@ dependencies = [ [[package]] name = "russh" -version = "0.39.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7878311587d0353a854d5be954fbe68bdf6e77873933b484d1e45db12bb2f8cf" +checksum = "98bee7ebcce06bfc40a46b9d90205c6132d899bb9095c5ce9da3cdad8ec0833d" dependencies = [ "aes", "aes-gcm", @@ -6800,9 +6854,9 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.38.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557ab9190022dff78116ebed5e391abbd3f424b06cd643dfe262346ab91ed8c9" +checksum = "3b5d5a656fe1c3024d829d054cd8c0c78dc831e4b2d4b08360569c3b38f3017f" dependencies = [ "aes", "async-trait", @@ -6822,11 +6876,13 @@ dependencies = [ "md5", "num-bigint", "num-integer", + "p256", "pbkdf2 0.11.0", "rand 0.7.3", "rand_core 0.6.4", "russh-cryptovec", "serde", + "sha1", "sha2", "thiserror", "tokio", @@ -7108,6 +7164,20 @@ dependencies = [ "untrusted 0.7.1", ] +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + [[package]] name = "secrecy" version = "0.8.0" diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index e78a8792d3..66e1a58a2c 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -15,8 +15,8 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest.workspace = true -russh = "0.39.0" -russh-keys = "0.38.0" +russh = "0.40.0" +russh-keys = "0.40.0" tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index fe7c3bdc81..098509291a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -15,6 +15,7 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] anyhow = { version = "1.0.75", features = ["backtrace"] } +base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } @@ -33,9 +34,12 @@ crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } +der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.9.0" } +elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } +ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } futures = { version = "0.3.29" } futures-channel = { version = "0.3.29", features = ["sink"] } @@ -47,8 +51,10 @@ futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } +group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } +hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } indexmap = { version = "2.1.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } @@ -65,6 +71,7 @@ num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } +pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } @@ -82,7 +89,6 @@ semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } -signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2.2.1", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } @@ -110,6 +116,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] anyhow = { version = "1.0.75", features = ["backtrace"] } +base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } @@ -128,9 +135,12 @@ crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } +der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.9.0" } +elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } +ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } futures = { version = "0.3.29" } futures-channel = { version = "0.3.29", features = ["sink"] } @@ -142,8 +152,10 @@ futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } +group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } +hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } indexmap = { version = "2.1.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } @@ -160,6 +172,7 @@ num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } +pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } @@ -177,7 +190,6 @@ semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } -signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2.2.1", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } From 75cdeebdd1b0f53447e4f91b8d202d1e2ae8efa2 Mon Sep 17 00:00:00 2001 From: Nils Nieuwejaar Date: Wed, 6 Dec 2023 22:05:11 -0500 Subject: [PATCH 063/186] bump dendrite, softnpu, and sidecar-lite versions to get full multipath (#4637) --- package-manifest.toml | 12 ++++++------ tools/ci_download_softnpu_machinery | 2 +- tools/create_virtual_hardware.sh | 4 ++-- tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 49f202089c..bd60fe9e93 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" -source.sha256 = "06b5eeedaebf30e96a5c5e932e08034c90947af7a54e9bc04d57d6807013ade9" +source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" +source.sha256 = "b14e73c8091a004472f9825b9b81b2c685bc5a48801704380a80481499060ad9" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" -source.sha256 = "51be0b0342bc7cdf927797af45af3bc82861bb8efb174d50958cb16b5620c51d" +source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" +source.sha256 = "06575bea6173d16f6d206b580956ae2cdc72c65df2eb2f40dac01468ab49e336" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" -source.sha256 = "9afb24cdae27755eaf86a856268686bb641048b5d450dae858cf47b9daaa46ed" +source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" +source.sha256 = "db2a398426fe59bd911eed91a3db7731a7a4d57e31dd357d89828d04b0891e2a" output.type = "zone" output.intermediate_only = true diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index 3efb030063..e147238673 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="dec63e67156fe6e958991bbfa090629868115ab5" +SOFTNPU_COMMIT="dbab082dfa89da5db5ca2325c257089d2f130092" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index 884d356222..7721fb1c0f 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -63,8 +63,8 @@ function ensure_softnpu_zone { --omicron-zone \ --ports sc0_0,tfportrear0_0 \ --ports sc0_1,tfportqsfp0_0 \ - --sidecar-lite-commit f0585a29fb0285f7a1220c1118856b0e5c1f75c5 \ - --softnpu-commit dec63e67156fe6e958991bbfa090629868115ab5 + --sidecar-lite-commit 45ed98fea5824feb4d42f45bbf218e597dc9fc58 \ + --softnpu-commit dbab082dfa89da5db5ca2325c257089d2f130092 } "$SOURCE_DIR"/scrimlet/softnpu-init.sh success "softnpu zone exists" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index b6dc45a8d0..c2afe5ca87 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="1b15e62b04044ef2b15c82d8dcef03f6fc24b3d8" +COMMIT="45e05b2a90203d84510e0c8e902d9449b09ffd9b" SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 95f04db9e8..2b4f0e7555 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="06b5eeedaebf30e96a5c5e932e08034c90947af7a54e9bc04d57d6807013ade9" -CIDL_SHA256_LINUX_DPD="99a800cbd5739245154831004892d47be5a871e37c536ec3009911ddb02fdb16" -CIDL_SHA256_LINUX_SWADM="e92bfc071f3944523a2e69b13ee877a4fd87cb8a9a78011b4aa8f40218347e25" +CIDL_SHA256_ILLUMOS="b14e73c8091a004472f9825b9b81b2c685bc5a48801704380a80481499060ad9" +CIDL_SHA256_LINUX_DPD="a0d92b5007826b119c68fdaef753e33b125740ec7b3e771bfa6b3aa8d9fcb8cc" +CIDL_SHA256_LINUX_SWADM="13387460db5b57e6ffad6c0b8877af32cc6d53fecc4a1a0910143c0446d39a38" From 9aec9baabb1e723c2d49e04694ee6f75ce514caa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 03:11:09 +0000 Subject: [PATCH 064/186] Update Rust crate pem to v3 (#4625) Co-authored-by: Rain --- Cargo.lock | 19 +++++-------------- Cargo.toml | 2 +- test-utils/src/certificates.rs | 8 ++++---- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16636a75d8..cabd48eba3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4099,7 +4099,7 @@ dependencies = [ "oso", "oximeter", "paste", - "pem 1.1.1", + "pem", "petgraph", "pq-sys", "rand 0.8.5", @@ -4707,7 +4707,7 @@ dependencies = [ "oximeter-producer", "parse-display", "paste", - "pem 1.1.1", + "pem", "petgraph", "pq-sys", "pretty_assertions", @@ -4951,7 +4951,7 @@ dependencies = [ "libc", "omicron-common", "omicron-workspace-hack", - "pem 1.1.1", + "pem", "rcgen", "regex", "reqwest", @@ -5661,15 +5661,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" -[[package]] -name = "pem" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" -dependencies = [ - "base64 0.13.1", -] - [[package]] name = "pem" version = "3.0.2" @@ -6456,7 +6447,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c4f3084aa3bc7dfbba4eff4fab2a54db4324965d8872ab933565e6fbd83bc6" dependencies = [ - "pem 3.0.2", + "pem", "ring 0.16.20", "time", "yasna", @@ -8759,7 +8750,7 @@ dependencies = [ "hex", "log", "olpc-cjson", - "pem 3.0.2", + "pem", "percent-encoding", "reqwest", "ring 0.16.20", diff --git a/Cargo.toml b/Cargo.toml index c0935aec6f..98c83eb6e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -283,7 +283,7 @@ parse-display = "0.8.2" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } paste = "1.0.14" percent-encoding = "2.3.1" -pem = "1.1" +pem = "3.0" petgraph = "0.6.4" postgres-protocol = "0.6.6" predicates = "3.0.4" diff --git a/test-utils/src/certificates.rs b/test-utils/src/certificates.rs index ab84f30b15..54da013e0c 100644 --- a/test-utils/src/certificates.rs +++ b/test-utils/src/certificates.rs @@ -79,10 +79,10 @@ impl CertificateChain { fn tls_cert_to_pem(certs: &Vec) -> String { let mut serialized_certs = String::new(); for cert in certs { - let encoded_cert = pem::encode(&pem::Pem { - tag: "CERTIFICATE".to_string(), - contents: cert.0.clone(), - }); + let encoded_cert = pem::encode(&pem::Pem::new( + "CERTIFICATE".to_string(), + cert.0.clone(), + )); serialized_certs.push_str(&encoded_cert); } From 5594eab8058bc67e6468fc5cba08a6d5898d0113 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 6 Dec 2023 21:49:41 -0800 Subject: [PATCH 065/186] fix phased startup of zones when sled agent starts up (#4588) --- common/src/ledger.rs | 6 +++--- sled-agent/src/services.rs | 9 +-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/common/src/ledger.rs b/common/src/ledger.rs index c120ab953c..71d03fa8ee 100644 --- a/common/src/ledger.rs +++ b/common/src/ledger.rs @@ -7,7 +7,7 @@ use async_trait::async_trait; use camino::{Utf8Path, Utf8PathBuf}; use serde::{de::DeserializeOwned, Serialize}; -use slog::{error, info, warn, Logger}; +use slog::{debug, info, warn, Logger}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -88,7 +88,7 @@ impl Ledger { match T::read_from(log, &path).await { Ok(ledger) => ledgers.push(ledger), Err(err) => { - error!(log, "Failed to read ledger: {err}"; "path" => %path) + debug!(log, "Failed to read ledger: {err}"; "path" => %path) } } } @@ -184,7 +184,7 @@ pub trait Ledgerable: DeserializeOwned + Serialize + Send + Sync { err, }) } else { - warn!(log, "No ledger in {path}"); + info!(log, "No ledger in {path}"); Err(Error::NotFound) } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index fb6de8d38a..651d2638e0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -845,12 +845,7 @@ impl ServiceManager { None, omicron_zones_config.clone(), |z: &OmicronZoneConfig| { - matches!( - z.zone_type, - OmicronZoneType::InternalDns { .. } - | OmicronZoneType::BoundaryNtp { .. } - | OmicronZoneType::InternalNtp { .. } - ) + matches!(z.zone_type, OmicronZoneType::InternalDns { .. }) }, ) .await?; @@ -859,8 +854,6 @@ impl ServiceManager { // synchronization, which is a pre-requisite for the other services. We // keep `OmicronZoneType::InternalDns` because // `ensure_all_omicron_zones` is additive. - // TODO This looks like a duplicate of the block above -- why do we do - // this? let all_zones_request = self .ensure_all_omicron_zones( &mut existing_zones, From 5cc0ca4c97851ede2ff8bdd9dc2be51bd2b161d3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 00:00:10 -0800 Subject: [PATCH 066/186] Update Rust crate serde_with to v3 (#4640) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 59 +++++++++++++++++++++++++----------------------------- Cargo.toml | 2 +- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cabd48eba3..10126eeb49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -394,12 +394,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - [[package]] name = "base64" version = "0.21.5" @@ -1914,7 +1908,7 @@ source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c dependencies = [ "async-stream", "async-trait", - "base64 0.21.5", + "base64", "bytes", "camino", "chrono", @@ -2092,7 +2086,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "base64 0.21.5", + "base64", "chrono", "http", "omicron-sled-agent", @@ -2506,7 +2500,7 @@ dependencies = [ name = "gateway-client" version = "0.1.0" dependencies = [ - "base64 0.21.5", + "base64", "chrono", "gateway-messages", "omicron-workspace-hack", @@ -2734,7 +2728,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" dependencies = [ - "base64 0.21.5", + "base64", "bytes", "headers-core", "http", @@ -4061,7 +4055,7 @@ dependencies = [ "async-bb8-diesel", "async-trait", "authz-macros", - "base64 0.21.5", + "base64", "bb8", "camino", "camino-tempfile", @@ -4142,7 +4136,7 @@ name = "nexus-inventory" version = "0.1.0" dependencies = [ "anyhow", - "base64 0.21.5", + "base64", "chrono", "expectorate", "gateway-client", @@ -4222,7 +4216,7 @@ version = "0.1.0" dependencies = [ "anyhow", "api_identity", - "base64 0.21.5", + "base64", "chrono", "dns-service-client", "futures", @@ -4602,7 +4596,7 @@ name = "omicron-gateway" version = "0.1.0" dependencies = [ "anyhow", - "base64 0.21.5", + "base64", "clap 4.4.3", "dropshot", "expectorate", @@ -4646,7 +4640,7 @@ dependencies = [ "assert_matches", "async-bb8-diesel", "async-trait", - "base64 0.21.5", + "base64", "buf-list", "camino", "cancel-safe-futures", @@ -4856,7 +4850,7 @@ dependencies = [ "anyhow", "assert_matches", "async-trait", - "base64 0.21.5", + "base64", "bootstore", "bootstrap-agent-client", "bytes", @@ -5275,7 +5269,7 @@ name = "oxide-client" version = "0.1.0" dependencies = [ "anyhow", - "base64 0.21.5", + "base64", "chrono", "futures", "http", @@ -5667,7 +5661,7 @@ version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3163d2912b7c3b52d651a055f2c7eec9ba5cd22d26ef75b8dd3a59980b185923" dependencies = [ - "base64 0.21.5", + "base64", "serde", ] @@ -5924,7 +5918,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" dependencies = [ - "base64 0.21.5", + "base64", "byteorder", "bytes", "fallible-iterator", @@ -6170,7 +6164,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" dependencies = [ "async-trait", - "base64 0.21.5", + "base64", "futures", "progenitor", "rand 0.8.5", @@ -6192,7 +6186,7 @@ source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ea dependencies = [ "anyhow", "atty", - "base64 0.21.5", + "base64", "clap 4.4.3", "dropshot", "futures", @@ -6624,7 +6618,7 @@ version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ - "base64 0.21.5", + "base64", "bytes", "encoding_rs", "futures-core", @@ -6718,7 +6712,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ - "base64 0.21.5", + "base64", "bitflags 2.4.0", "serde", "serde_derive", @@ -6887,7 +6881,7 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5885493fdf0be6cdff808d1533ce878d21cfa49c7086fa00c66355cd9141bfc" dependencies = [ - "base64 0.21.5", + "base64", "blake2b_simd", "constant_time_eq 0.3.0", "crossbeam-utils", @@ -6993,7 +6987,7 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" dependencies = [ - "base64 0.21.5", + "base64", ] [[package]] @@ -7064,7 +7058,7 @@ name = "samael" version = "0.0.10" source = "git+https://github.com/njaremko/samael?branch=master#52028e45d11ceb7114bf0c730a9971207e965602" dependencies = [ - "base64 0.21.5", + "base64", "bindgen", "chrono", "data-encoding", @@ -7390,14 +7384,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.3.3" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" +checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23" dependencies = [ - "base64 0.13.1", + "base64", "chrono", "hex", "indexmap 1.9.3", + "indexmap 2.1.0", "serde", "serde_json", "serde_with_macros", @@ -7406,9 +7401,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.3.3" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f" +checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788" dependencies = [ "darling 0.20.3", "proc-macro2", @@ -9593,7 +9588,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "base64 0.21.5", + "base64", "bootstrap-agent-client", "buf-list", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 98c83eb6e7..4da39b7902 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -321,7 +321,7 @@ serde_json = "1.0.108" serde_path_to_error = "0.1.14" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "2.3.3" +serde_with = "3.4.0" serial_test = "0.10" sha2 = "0.10.8" sha3 = "0.10.8" From b3c07a9269ed47cd1f1f2224a15fcc5feae3830a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 08:27:45 +0000 Subject: [PATCH 067/186] Update tough to 0.16 and ring to 0.17 (#4639) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> Co-authored-by: Rain --- Cargo.lock | 30 +++++++++++++++--------------- Cargo.toml | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10126eeb49..39467bacf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4712,7 +4712,7 @@ dependencies = [ "ref-cast", "regex", "reqwest", - "ring 0.16.20", + "ring 0.17.7", "rustls", "samael", "schemars", @@ -4803,7 +4803,7 @@ dependencies = [ "petgraph", "rayon", "reqwest", - "ring 0.16.20", + "ring 0.17.7", "semver 1.0.20", "serde", "sled-hardware", @@ -4949,7 +4949,7 @@ dependencies = [ "rcgen", "regex", "reqwest", - "ring 0.16.20", + "ring 0.17.7", "rustls", "slog", "subprocess", @@ -5038,7 +5038,7 @@ dependencies = [ "regex-automata 0.4.3", "regex-syntax 0.8.2", "reqwest", - "ring 0.16.20", + "ring 0.17.7", "rustix 0.38.25", "schemars", "semver 1.0.20", @@ -6694,9 +6694,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.5" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom 0.2.10", @@ -6964,7 +6964,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" dependencies = [ "log", - "ring 0.17.5", + "ring 0.17.7", "rustls-webpki", "sct", ] @@ -6996,7 +6996,7 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.5", + "ring 0.17.7", "untrusted 0.9.0", ] @@ -8730,9 +8730,9 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tough" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16dc5f42fc7ce7cb51eebc7a6ef91f4d69a6d41bb13f34a09674ec47e454d9b" +checksum = "49455926f64001de53ef047c2053e2f17440e412b8b1e958d4ad8a6008db7128" dependencies = [ "async-recursion", "async-trait", @@ -8748,7 +8748,7 @@ dependencies = [ "pem", "percent-encoding", "reqwest", - "ring 0.16.20", + "ring 0.17.7", "serde", "serde_json", "serde_plain", @@ -8757,7 +8757,7 @@ dependencies = [ "tokio", "tokio-util", "typed-path", - "untrusted 0.7.1", + "untrusted 0.9.0", "url", "walkdir", ] @@ -8958,7 +8958,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "rand 0.8.5", - "ring 0.16.20", + "ring 0.17.7", "serde", "serde_json", "serde_path_to_error", @@ -9003,9 +9003,9 @@ dependencies = [ [[package]] name = "typed-path" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb9d13b8242894ff21f9990082b90a6410a43dcc6029ac4227a1467853ba781" +checksum = "4a90726108dab678edab76459751e1cc7c597c3484a6384d6423191255fa641b" [[package]] name = "typenum" diff --git a/Cargo.toml b/Cargo.toml index 4da39b7902..48e6c02b60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -305,7 +305,7 @@ ref-cast = "1.0" regex = "1.10.2" regress = "0.7.1" reqwest = { version = "0.11", default-features = false } -ring = "0.16" +ring = "0.17.7" rpassword = "7.3.1" rstest = "0.18.2" rustfmt-wrapper = "0.2" @@ -373,7 +373,7 @@ tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.8" toml_edit = "0.21.0" topological-sort = "0.2.2" -tough = { version = "0.15", features = [ "http" ] } +tough = { version = "0.16.0", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 098509291a..88ab9b2060 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -83,7 +83,7 @@ regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } -ring = { version = "0.16.20", features = ["std"] } +ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } @@ -184,7 +184,7 @@ regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } -ring = { version = "0.16.20", features = ["std"] } +ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } From 03823f1bee527b004a925951f478c657626a9377 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 7 Dec 2023 06:54:19 -0800 Subject: [PATCH 068/186] [omdb] typo fix in error message (#4631) Noticed this while investigating #4621: ``` warning: 16 collection errorwere s reported above ``` --- dev-tools/omdb/src/bin/omdb/db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 85c55d4e61..08a783d8c8 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2477,8 +2477,8 @@ async fn cmd_db_inventory_collections_show( eprintln!( "warning: {} collection error{} {} reported above", nerrors, + if nerrors == 1 { "" } else { "s" }, if nerrors == 1 { "was" } else { "were" }, - if nerrors == 1 { "" } else { "s" } ); } From 9660fa790ea69bd5046909c590acb826f7d426e0 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 7 Dec 2023 13:13:21 -0600 Subject: [PATCH 069/186] Bump web console (#4646) ### User-facing changes * [1802c285](https://github.com/oxidecomputer/console/commit/1802c285) oxidecomputer/console#1839 * [6ae6beeb](https://github.com/oxidecomputer/console/commit/6ae6beeb) oxidecomputer/console#1829 * [a0bf47aa](https://github.com/oxidecomputer/console/commit/a0bf47aa) oxidecomputer/console#1836 * [9e82f9ab](https://github.com/oxidecomputer/console/commit/9e82f9ab) oxidecomputer/console#1811 * [5a6dcea7](https://github.com/oxidecomputer/console/commit/5a6dcea7) oxidecomputer/console#1822 ### All changes https://github.com/oxidecomputer/console/compare/ae8218df...1802c285 * [1802c285](https://github.com/oxidecomputer/console/commit/1802c285) oxidecomputer/console#1839 * [ce09b547](https://github.com/oxidecomputer/console/commit/ce09b547) bump postcss-pseudo-classes for fake vuln * [e09b803b](https://github.com/oxidecomputer/console/commit/e09b803b) might as well get vitest 1.0 in there too * [83dd73ee](https://github.com/oxidecomputer/console/commit/83dd73ee) minor bumps for react router, msw, vite, tailwind, recharts * [6ae6beeb](https://github.com/oxidecomputer/console/commit/6ae6beeb) oxidecomputer/console#1829 * [a0bf47aa](https://github.com/oxidecomputer/console/commit/a0bf47aa) oxidecomputer/console#1836 * [6c9420ad](https://github.com/oxidecomputer/console/commit/6c9420ad) oxidecomputer/console#1835 * [64e97b01](https://github.com/oxidecomputer/console/commit/64e97b01) api-diff also takes a commit * [22bef0bb](https://github.com/oxidecomputer/console/commit/22bef0bb) oxidecomputer/console#1833 * [2fe50f51](https://github.com/oxidecomputer/console/commit/2fe50f51) oxidecomputer/console#1810 * [faadb6d3](https://github.com/oxidecomputer/console/commit/faadb6d3) oxidecomputer/console#1832 * [9e82f9ab](https://github.com/oxidecomputer/console/commit/9e82f9ab) oxidecomputer/console#1811 * [5e11fd83](https://github.com/oxidecomputer/console/commit/5e11fd83) tweak api-diff * [dae20577](https://github.com/oxidecomputer/console/commit/dae20577) oxidecomputer/console#1827 * [ed0ef62e](https://github.com/oxidecomputer/console/commit/ed0ef62e) minor tweaks to api-diff script * [1c790d27](https://github.com/oxidecomputer/console/commit/1c790d27) oxidecomputer/console#1819 * [97be7724](https://github.com/oxidecomputer/console/commit/97be7724) oxidecomputer/console#1826 * [87f4d8b8](https://github.com/oxidecomputer/console/commit/87f4d8b8) oxidecomputer/console#1814 * [65ae1212](https://github.com/oxidecomputer/console/commit/65ae1212) oxidecomputer/console#1820 * [5a6dcea7](https://github.com/oxidecomputer/console/commit/5a6dcea7) oxidecomputer/console#1822 * [4e1bbe13](https://github.com/oxidecomputer/console/commit/4e1bbe13) oxidecomputer/console#1821 * [17408f64](https://github.com/oxidecomputer/console/commit/17408f64) oxidecomputer/console#1813 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 811620e9e7..725bda0ee9 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="ae8218df707360a902133f4a96b48a3b5a62a09e" -SHA2="ae35b991d3ff835a59b59126298790cb7431a282b25ba4add4e7fb6ea6b98989" +COMMIT="1802c2859f40712017ab89e72740e39bfd59320b" +SHA2="34768a895f187a6ed263c0050c42084f3907c331b547362871c2ce330e9d08d1" From 77ad3584c632b8b3b2f5aef04cd0fcf901fd75f6 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 7 Dec 2023 12:31:25 -0800 Subject: [PATCH 070/186] [wicketd] be more explicit about ordering in test_update_races (#4641) The switch to a multithreaded executor for this test in #4477 appears to have exposed a race within this test (quite ironic given the name!) Fix the race by being explicit about the ordering of events. Fixes #4590. --- wicketd/src/update_tracker.rs | 61 ++++++++++++++++------ wicketd/tests/integration_tests/updates.rs | 11 ++-- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index a86ea35cc3..7faaa08a28 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -181,14 +181,19 @@ impl UpdateTracker { } /// Starts a fake update that doesn't perform any steps, but simply waits - /// for a watch receiver to resolve. + /// for a receiver to resolve. + /// + /// The inner sender will resolve once the update is completed. #[doc(hidden)] pub async fn start_fake_update( &self, sps: BTreeSet, - watch_receiver: watch::Receiver<()>, + fake_step_receiver: oneshot::Receiver>, ) -> Result<(), Vec> { - let imp = FakeUpdateDriver { watch_receiver, log: self.log.clone() }; + let imp = FakeUpdateDriver { + fake_step_receiver: Some(fake_step_receiver), + log: self.log.clone(), + }; self.start_impl(sps, Some(imp)).await } @@ -515,7 +520,7 @@ impl<'tr> SpawnUpdateDriver for RealSpawnUpdateDriver<'tr> { /// waits for a [`watch::Receiver`] to resolve. #[derive(Debug)] struct FakeUpdateDriver { - watch_receiver: watch::Receiver<()>, + fake_step_receiver: Option>>, log: Logger, } @@ -539,22 +544,24 @@ impl SpawnUpdateDriver for FakeUpdateDriver { let engine = UpdateEngine::new(&log, sender); let abort_handle = engine.abort_handle(); - let mut watch_receiver = self.watch_receiver.clone(); + let fake_step_receiver = self + .fake_step_receiver + .take() + .expect("fake step receiver is only taken once"); let task = tokio::spawn(async move { // The step component and ID have been chosen arbitrarily here -- // they aren't important. - engine + let final_sender_handle = engine .new_step( UpdateComponent::Host, UpdateStepId::RunningInstallinator, "Fake step that waits for receiver to resolve", move |_cx| async move { - // This will resolve as soon as the watch sender - // (typically a test) sends a value over the watch - // channel. - _ = watch_receiver.changed().await; - StepSuccess::new(()).into() + // This will resolve as soon as the sender (typically a + // test) sends a value over the channel. + let ret = fake_step_receiver.await; + StepSuccess::new(ret).into() }, ) .register(); @@ -566,16 +573,36 @@ impl SpawnUpdateDriver for FakeUpdateDriver { } }); - match engine.execute().await { - Ok(_cx) => (), - Err(err) => { - error!(log, "update failed"; "err" => %err); - } - } + let engine_res = engine.execute().await; // Wait for all events to be received and written to the event // buffer. event_receiving_task.await.expect("event receiving task panicked"); + + // Finally, notify the receiving end of the inner sender: this + // indicates that the update is done. + match engine_res { + Ok(cx) => { + info!(log, "fake update completed successfully"); + let final_sender = + final_sender_handle.into_value(cx.token()).await; + match final_sender { + Ok(sender) => { + if let Err(_) = sender.send(()) { + warn!(log, "failed to send final value"); + } + } + Err(error) => { + // This occurs if the fake_step_receiver's sender + // side was closed. Nothing to do here but warn. + warn!(log, "failed to get final sender: {}", error); + } + } + } + Err(error) => { + error!(log, "fake update failed: {}", error); + } + } }); SpUpdateData { task, abort_handle, event_buffer } diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index 52bf1d1283..611d81c7f5 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -17,7 +17,7 @@ use omicron_common::{ api::internal::nexus::KnownArtifactKind, update::{ArtifactHashId, ArtifactKind}, }; -use tokio::sync::watch; +use tokio::sync::oneshot; use update_engine::NestedError; use uuid::Uuid; use wicket::OutputKind; @@ -436,7 +436,7 @@ async fn test_update_races() { }; let sps: BTreeSet<_> = vec![sp].into_iter().collect(); - let (sender, receiver) = watch::channel(()); + let (sender, receiver) = oneshot::channel(); wicketd_testctx .server .update_tracker @@ -455,7 +455,7 @@ async fn test_update_races() { // Also try starting another fake update, which should fail -- we don't let updates be started // if there's current update state. { - let (_, receiver) = watch::channel(()); + let (_, receiver) = oneshot::channel(); let err = wicketd_testctx .server .update_tracker @@ -470,9 +470,10 @@ async fn test_update_races() { } // Unblock the update, letting it run to completion. - sender.send(()).expect("receiver kept open by update engine"); + let (final_sender, final_receiver) = oneshot::channel(); + sender.send(final_sender).expect("receiver kept open by update engine"); + final_receiver.await.expect("update engine completed successfully"); - // Ensure that the event buffer indicates completion. let event_buffer = wicketd_testctx .wicketd_client .get_update_sp(&SpType::Sled, 0) From 2988235b260b1ac2b6bd169c38adea2138930b6a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 12:32:31 -0800 Subject: [PATCH 071/186] Update Rust crate once_cell to 1.19.0 (#4644) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 16 ++++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39467bacf2..52a16b414e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5102,9 +5102,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" diff --git a/Cargo.toml b/Cargo.toml index 48e6c02b60..2bdd8522eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,7 +261,7 @@ omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.9.1" oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b", features = [ "api", "std" ] } -once_cell = "1.18.0" +once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0-rc.1" # must match samael's crate! diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 88ab9b2060..ce65ddf062 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -220,14 +220,14 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] @@ -235,7 +235,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] @@ -243,7 +243,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] @@ -251,7 +251,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] @@ -259,7 +259,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] @@ -267,7 +267,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } @@ -277,7 +277,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } -once_cell = { version = "1.18.0", features = ["unstable"] } +once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } From a11a838defcffe2336acf56e1506adaa47ecfe5d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:19:40 -0800 Subject: [PATCH 072/186] Update actions/setup-node action to v4 (#4643) --- .github/workflows/validate-openapi-spec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 39c6c1debb..10f1dd5b46 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: actions/setup-node@1a4442cacd436585916779262731d5b162bc6ec7 # v3.8.2 + - uses: actions/setup-node@8f152de45cc393bb48ce5d89d36b731f54556e65 # v4.0.0 with: node-version: '18' - name: Install our tools From 8fa550c2c95b5fd3443af7e2e5837c2178fdb8c8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 7 Dec 2023 18:10:46 -0500 Subject: [PATCH 073/186] Remove URL based image creation and import (#2898) Remove both params::ImageSource::Url and params::ImportBlocksFromUrl (along with associated HTTP endpoint): do not allow customers to create an image from a URL, that was for development purposes only. Now that Nexus supports importing blocks via the Pantry this is no longer required. Closes #2893 Co-authored-by: iliana etaoin --- .github/buildomat/jobs/deploy.sh | 51 ++- Cargo.lock | 4 + end-to-end-tests/Cargo.toml | 4 + end-to-end-tests/src/bin/bootstrap.rs | 68 ++- end-to-end-tests/src/helpers/ctx.rs | 155 ++++--- end-to-end-tests/src/instance_launch.rs | 34 +- nexus/db-model/src/image.rs | 1 - nexus/src/app/disk.rs | 26 -- nexus/src/app/image.rs | 116 ----- nexus/src/app/sagas/import_blocks_from_url.rs | 413 ------------------ nexus/src/app/sagas/mod.rs | 2 - nexus/src/external_api/http_entrypoints.rs | 34 -- nexus/tests/integration_tests/endpoints.rs | 21 +- nexus/tests/integration_tests/images.rs | 315 ++----------- nexus/tests/integration_tests/instances.rs | 19 +- nexus/tests/integration_tests/pantry.rs | 46 -- nexus/tests/integration_tests/snapshots.rs | 53 +-- .../integration_tests/volume_management.rs | 18 +- nexus/tests/output/nexus_tags.txt | 1 - nexus/types/src/external_api/params.rs | 15 - nexus/types/src/external_api/views.rs | 3 - openapi/nexus.json | 119 ----- 22 files changed, 242 insertions(+), 1276 deletions(-) delete mode 100644 nexus/src/app/sagas/import_blocks_from_url.rs diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 3c4b3d88c8..f4f1e0a999 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -281,19 +281,15 @@ rmdir pkg E2E_TLS_CERT="/opt/oxide/sled-agent/pkg/initial-tls-cert.pem" # -# Image-related tests use images served by catacomb. The lab network is -# IPv4-only; the propolis zones are IPv6-only. These steps set up tcpproxy -# configured to proxy to catacomb via port 54321 in the global zone. +# Download the Oxide CLI and images from catacomb. # pfexec mkdir -p /usr/oxide -pfexec rm -f /usr/oxide/tcpproxy -pfexec curl -sSfL -o /usr/oxide/tcpproxy \ - http://catacomb.eng.oxide.computer:12346/tcpproxy -pfexec chmod +x /usr/oxide/tcpproxy -pfexec rm -f /var/svc/manifest/site/tcpproxy.xml -pfexec curl -sSfL -o /var/svc/manifest/site/tcpproxy.xml \ - http://catacomb.eng.oxide.computer:12346/tcpproxy.xml -pfexec svccfg import /var/svc/manifest/site/tcpproxy.xml +pfexec curl -sSfL -o /usr/oxide/oxide \ + http://catacomb.eng.oxide.computer:12346/oxide-v0.1.0 +pfexec chmod +x /usr/oxide/oxide + +curl -sSfL -o debian-11-genericcloud-amd64.raw \ + http://catacomb.eng.oxide.computer:12346/debian-11-genericcloud-amd64.raw # # The lab-netdev target is a ramdisk system that is always cleared @@ -336,7 +332,38 @@ echo "Waited for chrony: ${retry}s" export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END -./tests/bootstrap +eval "$(./tests/bootstrap)" +export OXIDE_HOST OXIDE_TOKEN + +# +# The Nexus resolved in `$OXIDE_RESOLVE` is not necessarily the same one that we +# successfully talked to in bootstrap, so wait a bit for it to fully come online. +# +retry=0 +while ! curl -sSf "$OXIDE_HOST/v1/ping" --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT"; do + if [[ $retry -gt 60 ]]; then + echo "$OXIDE_RESOLVE failed to come up after 60 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) +done + +/usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \ + project create --name images --description "some images" +/usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \ + disk import \ + --path debian-11-genericcloud-amd64.raw \ + --disk debian11-boot \ + --project images \ + --description "debian 11 cloud image from distros" \ + --snapshot debian11-snapshot \ + --image debian11 \ + --image-description "debian 11 original base image" \ + --image-os debian \ + --image-version "11" +/usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \ + image promote --project images --image debian11 rm ./tests/bootstrap for test_bin in tests/*; do diff --git a/Cargo.lock b/Cargo.lock index 52a16b414e..ed988f4b14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2089,6 +2089,7 @@ dependencies = [ "base64", "chrono", "http", + "hyper", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", @@ -2097,9 +2098,12 @@ dependencies = [ "reqwest", "russh", "russh-keys", + "serde", + "serde_json", "tokio", "toml 0.8.8", "trust-dns-resolver", + "uuid", ] [[package]] diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 66e1a58a2c..8a1f91eee8 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -10,6 +10,7 @@ async-trait.workspace = true base64.workspace = true chrono.workspace = true http.workspace = true +hyper.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true oxide-client.workspace = true @@ -17,7 +18,10 @@ rand.workspace = true reqwest.workspace = true russh = "0.40.0" russh-keys = "0.40.0" +serde.workspace = true +serde_json.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true +uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index c9001937db..83a37b8c21 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -1,18 +1,23 @@ use anyhow::Result; -use end_to_end_tests::helpers::ctx::{build_client, Context}; +use end_to_end_tests::helpers::ctx::{ClientParams, Context}; use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ - ByteCount, DiskCreate, DiskSource, IpRange, Ipv4Range, + ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, + DiskCreate, DiskSource, IpRange, Ipv4Range, }; use oxide_client::{ - ClientDisksExt, ClientProjectsExt, ClientSystemNetworkingExt, + ClientDisksExt, ClientHiddenExt, ClientProjectsExt, + ClientSystemNetworkingExt, }; +use serde::{de::DeserializeOwned, Deserialize}; use std::time::Duration; +use uuid::Uuid; #[tokio::main] async fn main() -> Result<()> { - let client = build_client().await?; + let params = ClientParams::new()?; + let client = params.build_client().await?; // ===== ENSURE NEXUS IS UP ===== // eprintln!("waiting for nexus to come up..."); @@ -71,8 +76,61 @@ async fn main() -> Result<()> { .disk(disk_name) .send() .await?; - ctx.cleanup().await?; + // ===== PRINT CLI ENVIRONMENT ===== // + let client_id = Uuid::new_v4(); + let DeviceAuthResponse { device_code, user_code } = + deserialize_byte_stream( + ctx.client + .device_auth_request() + .body(DeviceAuthRequest { client_id }) + .send() + .await?, + ) + .await?; + ctx.client + .device_auth_confirm() + .body(DeviceAuthVerify { user_code }) + .send() + .await?; + let DeviceAccessTokenGrant { access_token } = deserialize_byte_stream( + ctx.client + .device_access_token() + .body(DeviceAccessTokenRequest { + client_id, + device_code, + grant_type: "urn:ietf:params:oauth:grant-type:device_code" + .to_string(), + }) + .send() + .await?, + ) + .await?; + + println!("OXIDE_HOST={}", params.base_url()); + println!("OXIDE_RESOLVE={}", params.resolve_nexus().await?); + println!("OXIDE_TOKEN={}", access_token); + + ctx.cleanup().await?; eprintln!("let's roll."); Ok(()) } + +async fn deserialize_byte_stream( + response: oxide_client::ResponseValue, +) -> Result { + let body = hyper::Body::wrap_stream(response.into_inner_stream()); + let bytes = hyper::body::to_bytes(body).await?; + Ok(serde_json::from_slice(&bytes)?) +} + +#[derive(Deserialize)] +struct DeviceAuthResponse { + device_code: String, + user_code: String, +} + +#[derive(Deserialize)] +struct DeviceAccessTokenGrant { + access_token: String, +} diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 1c95703807..2c66bd4724 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -5,7 +5,8 @@ use omicron_sled_agent::rack_setup::config::SetupServiceConfig; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; -use oxide_client::{Client, ClientProjectsExt, ClientVpcsExt}; +use oxide_client::{Client, ClientImagesExt, ClientProjectsExt, ClientVpcsExt}; +use reqwest::dns::Resolve; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::Url; use std::net::IpAddr; @@ -13,6 +14,7 @@ use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; use trust_dns_resolver::error::ResolveErrorKind; +use uuid::Uuid; const RSS_CONFIG_STR: &str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), @@ -30,7 +32,7 @@ pub struct Context { impl Context { pub async fn new() -> Result { - Context::from_client(build_client().await?).await + Context::from_client(ClientParams::new()?.build_client().await?).await } pub async fn from_client(client: Client) -> Result { @@ -48,6 +50,10 @@ impl Context { Ok(Context { client, project_name }) } + pub async fn get_silo_image_id(&self, name: &str) -> Result { + Ok(self.client.image_view().image(name).send().await?.id) + } + pub async fn cleanup(self) -> Result<()> { self.client .vpc_subnet_delete() @@ -179,6 +185,27 @@ impl ClientParams { format!("{}://{}", self.proto, self.nexus_dns_name) } + pub async fn resolve_nexus(&self) -> Result { + let address = self + .resolver + .resolve(self.nexus_dns_name.parse()?) + .await + .map_err(anyhow::Error::msg)? + .next() + .with_context(|| { + format!( + "{} did not resolve to any addresses", + self.nexus_dns_name + ) + })?; + let port = match self.proto { + "http" => 80, + "https" => 443, + _ => unreachable!(), + }; + Ok(format!("{}:{}:{}", self.nexus_dns_name, port, address.ip())) + } + pub fn reqwest_builder(&self) -> reqwest::ClientBuilder { let mut builder = reqwest::ClientBuilder::new().dns_resolver(self.resolver.clone()); @@ -189,77 +216,77 @@ impl ClientParams { builder } -} -pub async fn build_client() -> Result { - // Prepare to make a login request. - let client_params = ClientParams::new()?; - let config = &client_params.rss_config; - let base_url = client_params.base_url(); - let silo_name = config.recovery_silo.silo_name.as_str(); - let login_url = format!("{}/v1/login/{}/local", base_url, silo_name); - let username: oxide_client::types::UserId = - config.recovery_silo.user_name.as_str().parse().map_err(|s| { - anyhow!("parsing configured recovery user name: {:?}", s) - })?; - // See the comment in the config file about this password. - let password: oxide_client::types::Password = "oxide".parse().unwrap(); + pub async fn build_client(&self) -> Result { + // Prepare to make a login request. + let config = &self.rss_config; + let base_url = self.base_url(); + let silo_name = config.recovery_silo.silo_name.as_str(); + let login_url = format!("{}/v1/login/{}/local", base_url, silo_name); + let username: oxide_client::types::UserId = + config.recovery_silo.user_name.as_str().parse().map_err(|s| { + anyhow!("parsing configured recovery user name: {:?}", s) + })?; + // See the comment in the config file about this password. + let password: oxide_client::types::Password = "oxide".parse().unwrap(); - // By the time we get here, Nexus might not be up yet. It may not have - // published its names to external DNS, and even if it has, it may not have - // opened its external listening socket. So we have to retry a bit until we - // succeed. - let session_token = wait_for_condition( - || async { - // Use a raw reqwest client because it's not clear that Progenitor - // is intended to support endpoints that return 300-level response - // codes. See progenitor#451. - eprintln!("{}: attempting to log into API", Utc::now()); + // By the time we get here, Nexus might not be up yet. It may not have + // published its names to external DNS, and even if it has, it may not have + // opened its external listening socket. So we have to retry a bit until we + // succeed. + let session_token = wait_for_condition( + || async { + // Use a raw reqwest client because it's not clear that Progenitor + // is intended to support endpoints that return 300-level response + // codes. See progenitor#451. + eprintln!("{}: attempting to log into API", Utc::now()); - let builder = client_params - .reqwest_builder() - .connect_timeout(Duration::from_secs(15)) - .timeout(Duration::from_secs(60)); + let builder = self + .reqwest_builder() + .connect_timeout(Duration::from_secs(15)) + .timeout(Duration::from_secs(60)); - oxide_client::login( - builder, - &login_url, - username.clone(), - password.clone(), - ) - .await - .map_err(|e| { - eprintln!("{}: login failed: {:#}", Utc::now(), e); - if let oxide_client::LoginError::RequestError(e) = &e { - if e.is_connect() { - return CondCheckError::NotYet; + oxide_client::login( + builder, + &login_url, + username.clone(), + password.clone(), + ) + .await + .map_err(|e| { + eprintln!("{}: login failed: {:#}", Utc::now(), e); + if let oxide_client::LoginError::RequestError(e) = &e { + if e.is_connect() { + return CondCheckError::NotYet; + } } - } - CondCheckError::Failed(e) - }) - }, - &Duration::from_secs(1), - &Duration::from_secs(600), - ) - .await - .context("logging in")?; + CondCheckError::Failed(e) + }) + }, + &Duration::from_secs(1), + &Duration::from_secs(600), + ) + .await + .context("logging in")?; - eprintln!("{}: login succeeded", Utc::now()); + eprintln!("{}: login succeeded", Utc::now()); - let mut headers = HeaderMap::new(); - headers.insert( - http::header::COOKIE, - HeaderValue::from_str(&format!("session={}", session_token)).unwrap(), - ); + let mut headers = HeaderMap::new(); + headers.insert( + http::header::COOKIE, + HeaderValue::from_str(&format!("session={}", session_token)) + .unwrap(), + ); - let reqwest_client = client_params - .reqwest_builder() - .default_headers(headers) - .connect_timeout(Duration::from_secs(15)) - .timeout(Duration::from_secs(60)) - .build()?; - Ok(Client::new_with_client(&base_url, reqwest_client)) + let reqwest_client = self + .reqwest_builder() + .default_headers(headers) + .connect_timeout(Duration::from_secs(15)) + .timeout(Duration::from_secs(60)) + .build()?; + Ok(Client::new_with_client(&base_url, reqwest_client)) + } } async fn wait_for_records( diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 30ccd0d4a3..b3d1406070 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -5,13 +5,11 @@ use anyhow::{ensure, Context as _, Result}; use async_trait::async_trait; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ - ByteCount, DiskCreate, DiskSource, ExternalIpCreate, ImageCreate, - ImageSource, InstanceCpuCount, InstanceCreate, InstanceDiskAttachment, - InstanceNetworkInterfaceAttachment, SshKeyCreate, -}; -use oxide_client::{ - ClientDisksExt, ClientImagesExt, ClientInstancesExt, ClientSessionExt, + ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount, + InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, + SshKeyCreate, }; +use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt}; use russh::{ChannelMsg, Disconnect}; use russh_keys::key::{KeyPair, PublicKey}; use russh_keys::PublicKeyBase64; @@ -38,26 +36,6 @@ async fn instance_launch() -> Result<()> { .send() .await?; - eprintln!("create system image"); - let image_id = ctx - .client - .image_create() - .body(ImageCreate { - name: generate_name("debian")?, - description: String::new(), - os: "debian".try_into().map_err(anyhow::Error::msg)?, - version: "propolis-blob".into(), - source: ImageSource::Url { - url: - "http://[fd00:1122:3344:101::1]:54321/debian-11-genericcloud-amd64.raw" - .into(), - block_size: 512.try_into().map_err(anyhow::Error::msg)?, - }, - }) - .send() - .await? - .id; - eprintln!("create disk"); let disk_name = generate_name("disk")?; let disk_name = ctx @@ -67,7 +45,9 @@ async fn instance_launch() -> Result<()> { .body(DiskCreate { name: disk_name.clone(), description: String::new(), - disk_source: DiskSource::Image { image_id }, + disk_source: DiskSource::Image { + image_id: ctx.get_silo_image_id("debian11").await?, + }, size: ByteCount(2048 * 1024 * 1024), }) .send() diff --git a/nexus/db-model/src/image.rs b/nexus/db-model/src/image.rs index 91a9469d30..6cdf3201be 100644 --- a/nexus/db-model/src/image.rs +++ b/nexus/db-model/src/image.rs @@ -202,7 +202,6 @@ impl From for views::Image { Self { identity: image.identity(), project_id: image.project_id, - url: image.url, os: image.os, version: image.version, digest: image.digest.map(|x| x.into()), diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 28d6c4506c..5cfecc9f08 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -369,32 +369,6 @@ impl super::Nexus { Ok(()) } - /// Import blocks from a URL into a disk - pub(crate) async fn import_blocks_from_url_for_disk( - self: &Arc, - opctx: &OpContext, - disk_lookup: &lookup::Disk<'_>, - params: params::ImportBlocksFromUrl, - ) -> UpdateResult<()> { - let authz_disk: authz::Disk; - - (.., authz_disk) = - disk_lookup.lookup_for(authz::Action::Modify).await?; - - let saga_params = sagas::import_blocks_from_url::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), - disk_id: authz_disk.id(), - - import_params: params.clone(), - }; - - self - .execute_saga::(saga_params) - .await?; - - Ok(()) - } - /// Move a disk from the "ImportReady" state to the "Importing" state, /// blocking any import from URL jobs. pub(crate) async fn disk_manual_import_start( diff --git a/nexus/src/app/image.rs b/nexus/src/app/image.rs index 8fa9308c1d..5e78b2a096 100644 --- a/nexus/src/app/image.rs +++ b/nexus/src/app/image.rs @@ -23,7 +23,6 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use std::str::FromStr; use std::sync::Arc; use uuid::Uuid; @@ -96,121 +95,6 @@ impl super::Nexus { } }; let new_image = match ¶ms.source { - params::ImageSource::Url { url, block_size } => { - let db_block_size = db::model::BlockSize::try_from(*block_size) - .map_err(|e| Error::InvalidValue { - label: String::from("block_size"), - message: format!("block_size is invalid: {}", e), - })?; - - let image_id = Uuid::new_v4(); - - let volume_construction_request = - sled_agent_client::types::VolumeConstructionRequest::Url { - id: image_id, - block_size: db_block_size.to_bytes().into(), - url: url.clone(), - }; - - let volume_data = - serde_json::to_string(&volume_construction_request)?; - - // use reqwest to query url for size - let dur = std::time::Duration::from_secs(5); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(|e| { - Error::internal_error(&format!( - "failed to build reqwest client: {}", - e - )) - })?; - - let response = client.head(url).send().await.map_err(|e| { - Error::InvalidValue { - label: String::from("url"), - message: format!("error querying url: {}", e), - } - })?; - - if !response.status().is_success() { - return Err(Error::InvalidValue { - label: String::from("url"), - message: format!( - "querying url returned: {}", - response.status() - ), - }); - } - - // grab total size from content length - let content_length = response - .headers() - .get(reqwest::header::CONTENT_LENGTH) - .ok_or("no content length!") - .map_err(|e| Error::InvalidValue { - label: String::from("url"), - message: format!("error querying url: {}", e), - })?; - - let total_size = - u64::from_str(content_length.to_str().map_err(|e| { - Error::InvalidValue { - label: String::from("url"), - message: format!("content length invalid: {}", e), - } - })?) - .map_err(|e| { - Error::InvalidValue { - label: String::from("url"), - message: format!("content length invalid: {}", e), - } - })?; - - let size: external::ByteCount = total_size.try_into().map_err( - |e: external::ByteCountRangeError| Error::InvalidValue { - label: String::from("size"), - message: format!("total size is invalid: {}", e), - }, - )?; - - // validate total size is divisible by block size - let block_size: u64 = (*block_size).into(); - if (size.to_bytes() % block_size) != 0 { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( - "total size {} must be divisible by block size {}", - size.to_bytes(), - block_size - ), - }); - } - - let new_image_volume = - db::model::Volume::new(Uuid::new_v4(), volume_data); - let volume = - self.db_datastore.volume_create(new_image_volume).await?; - - db::model::Image { - identity: db::model::ImageIdentity::new( - image_id, - params.identity.clone(), - ), - silo_id: authz_silo.id(), - project_id: maybe_authz_project.clone().map(|p| p.id()), - volume_id: volume.id(), - url: Some(url.clone()), - os: params.os.clone(), - version: params.version.clone(), - digest: None, // not computed for URL type - block_size: db_block_size, - size: size.into(), - } - } - params::ImageSource::Snapshot { id } => { let image_id = Uuid::new_v4(); diff --git a/nexus/src/app/sagas/import_blocks_from_url.rs b/nexus/src/app/sagas/import_blocks_from_url.rs deleted file mode 100644 index ffee40ba72..0000000000 --- a/nexus/src/app/sagas/import_blocks_from_url.rs +++ /dev/null @@ -1,413 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! For disks in state ImportReady, send a request to import blocks from a URL. -//! Note the Pantry they're attached to must have addressability to the URL! - -use super::declare_saga_actions; -use super::ActionRegistry; -use super::NexusActionContext; -use super::NexusSaga; -use super::SagaInitError; -use crate::app::sagas::retry_until_known_result; -use nexus_db_model::Generation; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::{authn, authz}; -use nexus_types::external_api::params; -use omicron_common::api::external; -use omicron_common::api::external::Error; -use serde::Deserialize; -use serde::Serialize; -use std::net::SocketAddrV6; -use steno::ActionError; -use uuid::Uuid; - -#[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { - pub serialized_authn: authn::saga::Serialized, - pub disk_id: Uuid, - - pub import_params: params::ImportBlocksFromUrl, -} - -declare_saga_actions! { - import_blocks_from_url; - SET_IMPORTING_STATE -> "disk_generation_number" { - + sibfu_get_importing_state - - sibfu_get_importing_state_undo - } - GET_PANTRY_ADDRESS -> "pantry_address" { - + sibfu_get_pantry_address - } - CALL_PANTRY_IMPORT_FROM_URL_FOR_DISK -> "call_pantry_import_from_url_for_disk" { - + sibfu_call_pantry_import_from_url_for_disk - } - WAIT_FOR_IMPORT_FROM_URL -> "wait_for_import_from_url" { - + sibfu_wait_for_import_from_url - } - SET_IMPORT_READY_STATE -> "set_import_ready_state" { - + sibfu_get_import_ready_state - } -} - -#[derive(Debug)] -pub(crate) struct SagaImportBlocksFromUrl; -impl NexusSaga for SagaImportBlocksFromUrl { - const NAME: &'static str = "import-blocks-from-url"; - type Params = Params; - - fn register_actions(registry: &mut ActionRegistry) { - import_blocks_from_url_register_actions(registry); - } - - fn make_saga_dag( - _params: &Self::Params, - mut builder: steno::DagBuilder, - ) -> Result { - builder.append(set_importing_state_action()); - - builder.append(get_pantry_address_action()); - - // Call the Pantry's /import_from_url - builder.append(call_pantry_import_from_url_for_disk_action()); - - // Wait for import_from_url job to complete - builder.append(wait_for_import_from_url_action()); - - // Set ImportReady state - builder.append(set_import_ready_state_action()); - - Ok(builder.build()?) - } -} - -async fn sibfu_get_importing_state( - sagactx: NexusActionContext, -) -> Result { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_disk, db_disk) = - LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - - match db_disk.state().into() { - external::DiskState::ImportReady => { - info!( - log, - "setting disk {} to state importing_from_url", - db_disk.id(), - ); - - osagactx - .datastore() - .disk_update_runtime( - &opctx, - &authz_disk, - &db_disk.runtime().importing_from_url(), - ) - .await - .map_err(ActionError::action_failed)?; - - // Record the disk's new generation number as this saga node's output. It - // will be important later to *only* transition this disk out of maintenance - // if the generation number matches what *this* saga is doing. - let (.., db_disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch_for(authz::Action::Read) - .await - .map_err(ActionError::action_failed)?; - - Ok(db_disk.runtime().gen) - } - - _ => Err(ActionError::action_failed(Error::invalid_request(&format!( - "cannot import blocks from a url into disk in state {:?}", - db_disk.state() - )))), - } -} - -async fn sibfu_get_importing_state_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_disk, db_disk) = - LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - - let expected_disk_generation_number = - sagactx.lookup::("disk_generation_number")?; - - match db_disk.state().into() { - external::DiskState::ImportingFromUrl => { - // A previous execution of *this* saga may hav already transitioned this disk to - // import_ready. Another saga racing with this one may have transitioned the disk to - // importing - only set this disk to import_ready if the generation number matches this - // saga. - if expected_disk_generation_number == db_disk.runtime().gen { - info!( - log, - "undo: setting disk {} state from importing_from_url to import_ready", - params.disk_id - ); - - osagactx - .datastore() - .disk_update_runtime( - &opctx, - &authz_disk, - &db_disk.runtime().import_ready(), - ) - .await - .map_err(ActionError::action_failed)?; - } else { - info!( - log, - "disk {} has generation number {:?}, which doesn't match the expected {:?}: skip setting to import_ready", - params.disk_id, - db_disk.runtime().gen, - expected_disk_generation_number, - ); - } - } - - external::DiskState::ImportReady => { - info!(log, "disk {} already import_ready", params.disk_id); - } - - _ => { - warn!(log, "disk is in state {:?}", db_disk.state()); - } - } - - Ok(()) -} - -async fn sibfu_get_pantry_address( - sagactx: NexusActionContext, -) -> Result { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., db_disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - - // At any stage of executing this saga, if the disk moves from state - // importing to detached, it will be detached from the corresponding Pantry. - // Any subsequent saga nodes will fail because the pantry address is stored - // as part of the saga state, and requests sent to that Pantry with the - // disk's id will fail. - let pantry_address = db_disk.pantry_address().ok_or_else(|| { - ActionError::action_failed(String::from("disk not attached to pantry!")) - })?; - - info!(log, "disk {} is using pantry at {}", db_disk.id(), pantry_address); - - Ok(pantry_address) -} - -async fn sibfu_call_pantry_import_from_url_for_disk( - sagactx: NexusActionContext, -) -> Result { - let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - - let pantry_address = sagactx.lookup::("pantry_address")?; - let endpoint = format!("http://{}", pantry_address); - - info!( - log, - "sending import from url {} request for disk {} to pantry endpoint {}", - params.import_params.url, - params.disk_id, - endpoint, - ); - - let disk_id = params.disk_id.to_string(); - - let client = crucible_pantry_client::Client::new(&endpoint); - - let request = crucible_pantry_client::types::ImportFromUrlRequest { - url: params.import_params.url, - expected_digest: if let Some(expected_digest) = - params.import_params.expected_digest - { - match expected_digest { - nexus_types::external_api::params::ExpectedDigest::Sha256( - v, - ) => Some( - crucible_pantry_client::types::ExpectedDigest::Sha256(v), - ), - } - } else { - None - }, - }; - - let response = retry_until_known_result(log, || async { - client.import_from_url(&disk_id, &request).await - }) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "import from url failed with {:?}", - e - )) - })?; - - Ok(response.job_id.clone()) -} - -async fn sibfu_wait_for_import_from_url( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - - let pantry_address = sagactx.lookup::("pantry_address")?; - let job_id = - sagactx.lookup::("call_pantry_import_from_url_for_disk")?; - - let endpoint = format!("http://{}", pantry_address); - - let client = crucible_pantry_client::Client::new(&endpoint); - - info!( - log, - "waiting for import from url job {} for disk {} to complete on pantry {}", - job_id, - params.disk_id, - endpoint, - ); - - loop { - let result = retry_until_known_result(log, || async { - client.is_job_finished(&job_id).await - }) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "is_job_finished failed with {:?}", - e - )) - })?; - - if result.job_is_finished { - break; - } - - tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; - } - - info!( - log, - "import from url job {} for disk {} on pantry {} completed", - job_id, - params.disk_id, - endpoint, - ); - - let response = retry_until_known_result(log, || async { - client.job_result_ok(&job_id).await - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("job_result_ok failed with {:?}", e)) - })?; - - if !response.job_result_ok { - return Err(ActionError::action_failed(format!("Job {job_id} failed"))); - } - - Ok(()) -} - -async fn sibfu_get_import_ready_state( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_disk, db_disk) = - LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - - let expected_disk_generation_number = - sagactx.lookup::("disk_generation_number")?; - - match db_disk.state().into() { - external::DiskState::ImportingFromUrl => { - if expected_disk_generation_number == db_disk.runtime().gen { - info!( - log, - "setting disk {} state from importing_from_url to import_ready", - params.disk_id - ); - - osagactx - .datastore() - .disk_update_runtime( - &opctx, - &authz_disk, - &db_disk.runtime().import_ready(), - ) - .await - .map_err(ActionError::action_failed)?; - } else { - info!( - log, - "disk {} has generation number {:?}, which doesn't match the expected {:?}: skip setting to import_ready", - params.disk_id, - db_disk.runtime().gen, - expected_disk_generation_number, - ); - } - } - - external::DiskState::ImportReady => { - info!(log, "disk {} already import_ready", params.disk_id); - } - - _ => { - warn!(log, "disk is in state {:?}", db_disk.state()); - } - } - - Ok(()) -} diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 89e1a10052..c5918d32ef 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -23,7 +23,6 @@ pub mod disk_create; pub mod disk_delete; pub mod finalize_disk; pub mod image_delete; -pub mod import_blocks_from_url; mod instance_common; pub mod instance_create; pub mod instance_delete; @@ -125,7 +124,6 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); - ::register_actions(&mut registry); ::register_actions( &mut registry, ); diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a113451fc7..a2e5f633df 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -154,7 +154,6 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(disk_bulk_write_import_start)?; api.register(disk_bulk_write_import)?; api.register(disk_bulk_write_import_stop)?; - api.register(disk_import_blocks_from_url)?; api.register(disk_finalize_import)?; api.register(instance_list)?; @@ -1916,39 +1915,6 @@ async fn disk_bulk_write_import_stop( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Request to import blocks from URL -#[endpoint { - method = POST, - path = "/v1/disks/{disk}/import", - tags = ["disks"], -}] -async fn disk_import_blocks_from_url( - rqctx: RequestContext>, - path_params: Path, - query_params: Query, - import_params: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let params = import_params.into_inner(); - - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - - nexus - .import_blocks_from_url_for_disk(&opctx, &disk_lookup, params) - .await?; - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - /// Confirm disk block import completion #[endpoint { method = POST, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index db803bfde0..e11902d0fe 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -262,8 +262,6 @@ lazy_static! { ), }; - pub static ref DEMO_IMPORT_DISK_IMPORT_FROM_URL_URL: String = - format!("/v1/disks/{}/import?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); pub static ref DEMO_IMPORT_DISK_BULK_WRITE_START_URL: String = format!("/v1/disks/{}/bulk-write-start?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); pub static ref DEMO_IMPORT_DISK_BULK_WRITE_URL: String = @@ -493,10 +491,7 @@ lazy_static! { name: DEMO_IMAGE_NAME.clone(), description: String::from(""), }, - source: params::ImageSource::Url { - url: HTTP_SERVER.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(4096).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, os: "fake-os".to_string(), version: "1.0".to_string() }; @@ -1328,20 +1323,6 @@ lazy_static! { ], }, - VerifyEndpoint { - url: &DEMO_IMPORT_DISK_IMPORT_FROM_URL_URL, - visibility: Visibility::Protected, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![ - AllowedMethod::Post( - serde_json::to_value(params::ImportBlocksFromUrl { - url: "obviously-fake-url".into(), - expected_digest: None, - }).unwrap() - ) - ], - }, - VerifyEndpoint { url: &DEMO_IMPORT_DISK_BULK_WRITE_START_URL, visibility: Visibility::Protected, diff --git a/nexus/tests/integration_tests/images.rs b/nexus/tests/integration_tests/images.rs index c3db9e8f13..9d608937ce 100644 --- a/nexus/tests/integration_tests/images.rs +++ b/nexus/tests/integration_tests/images.rs @@ -24,15 +24,11 @@ use nexus_types::identity::Resource; use omicron_common::api::external::Disk; use omicron_common::api::external::{ByteCount, IdentityMetadataCreateParams}; -use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; - type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; const PROJECT_NAME: &str = "myproj"; -const BLOCK_SIZE: params::BlockSize = params::BlockSize(512); - fn get_project_images_url(project_name: &str) -> String { format!("/v1/images?project={}", project_name) } @@ -56,18 +52,6 @@ async fn test_image_create(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let images_url = get_project_images_url(PROJECT_NAME); // Before project exists, image list 404s @@ -94,10 +78,9 @@ async fn test_image_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(images.len(), 0); // Create an image in the project - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); NexusRequest::objects_post(client, &images_url, &image_create_params) .authn_as(AuthnMode::PrivilegedUser) @@ -120,18 +103,6 @@ async fn test_silo_image_create(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let silo_images_url = "/v1/images"; // Expect no images in the silo @@ -144,10 +115,9 @@ async fn test_silo_image_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(images.len(), 0); // Create an image in the project - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); // Create image NexusRequest::objects_post(client, &silo_images_url, &image_create_params) @@ -165,162 +135,6 @@ async fn test_silo_image_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(images[0].identity.name, "alpine-edge"); } -#[nexus_test] -async fn test_image_create_url_404(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - DiskTest::new(&cptestctx).await; - - // need a project to post to - create_project(client, PROJECT_NAME).await; - - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with(status_code(404)), - ); - - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); - - let images_url = get_project_images_url(PROJECT_NAME); - - let error = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &images_url) - .body(Some(&image_create_params)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("unexpected success") - .parsed_body::() - .unwrap(); - assert_eq!( - error.message, - format!("unsupported value for \"url\": querying url returned: 404 Not Found") - ); -} - -#[nexus_test] -async fn test_image_create_bad_url(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - DiskTest::new(&cptestctx).await; - - // need a project to post to - create_project(client, PROJECT_NAME).await; - - let image_create_params = get_image_create(params::ImageSource::Url { - url: "not_a_url".to_string(), - block_size: BLOCK_SIZE, - }); - - let images_url = get_project_images_url(PROJECT_NAME); - - let error = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &images_url) - .body(Some(&image_create_params)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("unexpected success") - .parsed_body::() - .unwrap(); - assert_eq!( - error.message, - format!("unsupported value for \"url\": error querying url: builder error: relative URL without a base") - ); -} - -#[nexus_test] -async fn test_image_create_bad_content_length( - cptestctx: &ControlPlaneTestContext, -) { - let client = &cptestctx.external_client; - DiskTest::new(&cptestctx).await; - - // need a project to post to - create_project(client, PROJECT_NAME).await; - - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header("Content-Length", "bad"), - ), - ); - - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); - - let images_url = get_project_images_url(PROJECT_NAME); - - let error = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &images_url) - .body(Some(&image_create_params)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("unexpected success") - .parsed_body::() - .unwrap(); - assert_eq!( - error.message, - format!("unsupported value for \"url\": content length invalid: invalid digit found in string") - ); -} - -#[nexus_test] -async fn test_image_create_bad_image_size(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - DiskTest::new(&cptestctx).await; - - // need a project to post to - create_project(client, PROJECT_NAME).await; - - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with(status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000 + 100), - )), - ); - - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); - - let images_url = get_project_images_url(PROJECT_NAME); - - let error = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &images_url) - .body(Some(&image_create_params)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("unexpected success") - .parsed_body::() - .unwrap(); - assert_eq!( - error.message, - format!("unsupported value for \"size\": total size {} must be divisible by block size {}", 4096*1000 + 100, 512) - ); -} - #[nexus_test] async fn test_make_disk_from_image(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; @@ -329,23 +143,10 @@ async fn test_make_disk_from_image(cptestctx: &ControlPlaneTestContext) { // need a project to post both disk and image to create_project(client, PROJECT_NAME).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/alpine/edge.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - // Create an image in the project - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/alpine/edge.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); let images_url = get_project_images_url(PROJECT_NAME); @@ -384,23 +185,10 @@ async fn test_make_disk_from_other_project_image_fails( create_project(client, PROJECT_NAME).await; let another_project = create_project(client, "another-proj").await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let images_url = get_project_images_url(PROJECT_NAME); - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); let image = NexusRequest::objects_post(client, &images_url, &image_create_params) .authn_as(AuthnMode::PrivilegedUser) @@ -443,20 +231,10 @@ async fn test_make_disk_from_image_too_small( // need a project to post both disk and image to create_project(client, PROJECT_NAME).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/alpine/edge.raw")) - .times(1..) - .respond_with( - status_code(200).append_header("Content-Length", "2147483648"), - ), - ); - // Create an image in the project - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/alpine/edge.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); let images_url = get_project_images_url(PROJECT_NAME); @@ -474,7 +252,9 @@ async fn test_make_disk_from_image_too_small( disk_source: params::DiskSource::Image { image_id: alpine_image.identity.id, }, - size: ByteCount::from(1073741824), + + // Nexus defines YouCanBootAnythingAsLongAsItsAlpine size as 100M + size: ByteCount::from(90 * 1024 * 1024), }; let disks_url = format!("/v1/disks?project={}", PROJECT_NAME); @@ -493,7 +273,7 @@ async fn test_make_disk_from_image_too_small( error.message, format!( "disk size {} must be greater than or equal to image size {}", - 1073741824_u32, 2147483648_u32, + 94371840_u32, 104857600_u32, ) ); } @@ -503,18 +283,6 @@ async fn test_image_promotion(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let silo_images_url = "/v1/images"; let images_url = get_project_images_url(PROJECT_NAME); @@ -528,10 +296,9 @@ async fn test_image_promotion(cptestctx: &ControlPlaneTestContext) { assert_eq!(images.len(), 0); - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); NexusRequest::objects_post(client, &images_url, &image_create_params) .authn_as(AuthnMode::PrivilegedUser) @@ -631,28 +398,15 @@ async fn test_image_from_other_project_snapshot_fails( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - create_project(client, PROJECT_NAME).await; let images_url = get_project_images_url(PROJECT_NAME); let disks_url = format!("/v1/disks?project={}", PROJECT_NAME); let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); // Create an image - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); let image: views::Image = NexusRequest::objects_post(client, &images_url, &image_create_params) .authn_as(AuthnMode::PrivilegedUser) @@ -749,25 +503,12 @@ async fn test_image_deletion_permissions(cptestctx: &ControlPlaneTestContext) { // Create an image in the default silo using the privileged user - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let silo_images_url = "/v1/images"; let images_url = get_project_images_url(PROJECT_NAME); - let image_create_params = get_image_create(params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: BLOCK_SIZE, - }); + let image_create_params = get_image_create( + params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + ); let image = NexusRequest::objects_post(client, &images_url, &image_create_params) diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index f54370c32f..33d4d15d23 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -73,8 +73,6 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::shared::SiloRole; use omicron_sled_agent::sim; -use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; - type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -1280,18 +1278,6 @@ async fn test_instance_using_image_from_other_project_fails( let client = &cptestctx.external_client; create_org_and_project(&client).await; - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - // Create an image in springfield-squidport. let images_url = format!("/v1/images?project={}", PROJECT_NAME); let image_create_params = params::ImageCreate { @@ -1303,10 +1289,7 @@ async fn test_instance_using_image_from_other_project_fails( }, os: "alpine".to_string(), version: "edge".to_string(), - source: params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(512).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, }; let image = NexusRequest::objects_post(client, &images_url, &image_create_params) diff --git a/nexus/tests/integration_tests/pantry.rs b/nexus/tests/integration_tests/pantry.rs index 26e27e92ee..dc4e8e6c95 100644 --- a/nexus/tests/integration_tests/pantry.rs +++ b/nexus/tests/integration_tests/pantry.rs @@ -302,25 +302,6 @@ async fn bulk_write_stop( .unwrap(); } -async fn import_blocks_from_url(client: &ClientTestContext) { - // Import blocks from a URL - let import_blocks_from_url_url = - format!("/v1/disks/{}/import?project={}", DISK_NAME, PROJECT_NAME,); - - NexusRequest::new( - RequestBuilder::new(client, Method::POST, &import_blocks_from_url_url) - .body(Some(¶ms::ImportBlocksFromUrl { - url: "http://fake.endpoint/image.iso".to_string(), - expected_digest: None, - })) - .expect_status(Some(StatusCode::NO_CONTENT)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); -} - async fn finalize_import( client: &ClientTestContext, expected_status: StatusCode, @@ -461,33 +442,6 @@ async fn test_cannot_mount_import_from_bulk_writes_disk( .await; } -// Test the normal flow of importing from a URL -#[nexus_test] -async fn test_import_blocks_from_url(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - let nexus = &cptestctx.server.apictx().nexus; - - DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; - - create_disk_with_state_importing_blocks(client).await; - - // Import blocks from a URL - import_blocks_from_url(client).await; - - // Validate disk is in state ImportReady - validate_disk_state(client, DiskState::ImportReady).await; - - // Finalize import - finalize_import(client, StatusCode::NO_CONTENT).await; - - // Validate disk is in state Detached - validate_disk_state(client, DiskState::Detached).await; - - // Create an instance to attach the disk. - create_instance_and_attach_disk(client, nexus, StatusCode::ACCEPTED).await; -} - // Test the normal flow of importing from bulk writes #[nexus_test] async fn test_import_blocks_with_bulk_write( diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 1dd32e6769..a9ed1b7cb7 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -35,8 +35,6 @@ use omicron_common::api::external::Name; use omicron_nexus::app::MIN_DISK_SIZE_BYTES; use uuid::Uuid; -use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; - type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -64,18 +62,6 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { let disks_url = get_disks_url(); // Define a global image - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let image_create_params = params::ImageCreate { identity: IdentityMetadataCreateParams { name: "alpine-edge".parse().unwrap(), @@ -83,10 +69,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { "you can boot any image, as long as it's alpine", ), }, - source: params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(512).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, os: "alpine".to_string(), version: "edge".to_string(), }; @@ -184,18 +167,6 @@ async fn test_snapshot_without_instance(cptestctx: &ControlPlaneTestContext) { let disks_url = get_disks_url(); // Define a global image - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let image_create_params = params::ImageCreate { identity: IdentityMetadataCreateParams { name: "alpine-edge".parse().unwrap(), @@ -203,10 +174,7 @@ async fn test_snapshot_without_instance(cptestctx: &ControlPlaneTestContext) { "you can boot any image, as long as it's alpine", ), }, - source: params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(512).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, os: "alpine".to_string(), version: "edge".to_string(), }; @@ -842,18 +810,6 @@ async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { let disks_url = get_disks_url(); // Define a global image - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); - let image_create_params = params::ImageCreate { identity: IdentityMetadataCreateParams { name: "alpine-edge".parse().unwrap(), @@ -861,10 +817,7 @@ async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { "you can boot any image, as long as it's alpine", ), }, - source: params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(512).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, os: "alpine".to_string(), version: "edge".to_string(), }; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 24a0e5591b..5454e1f68f 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -30,8 +30,6 @@ use sled_agent_client::types::{CrucibleOpts, VolumeConstructionRequest}; use std::sync::Arc; use uuid::Uuid; -use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; - type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -63,17 +61,6 @@ async fn create_image(client: &ClientTestContext) -> views::Image { create_org_and_project(client).await; // Define a global image - let server = ServerBuilder::new().run().unwrap(); - server.expect( - Expectation::matching(request::method_path("HEAD", "/image.raw")) - .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), - ); let image_create_params = params::ImageCreate { identity: IdentityMetadataCreateParams { @@ -82,10 +69,7 @@ async fn create_image(client: &ClientTestContext) -> views::Image { "you can boot any image, as long as it's alpine", ), }, - source: params::ImageSource::Url { - url: server.url("/image.raw").to_string(), - block_size: params::BlockSize::try_from(512).unwrap(), - }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, os: "alpine".to_string(), version: "edge".to_string(), }; diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index b236d73551..5a4a61132e 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -6,7 +6,6 @@ disk_bulk_write_import_stop POST /v1/disks/{disk}/bulk-write-st disk_create POST /v1/disks disk_delete DELETE /v1/disks/{disk} disk_finalize_import POST /v1/disks/{disk}/finalize -disk_import_blocks_from_url POST /v1/disks/{disk}/import disk_list GET /v1/disks disk_metrics_list GET /v1/disks/{disk}/metrics/{metric} disk_view GET /v1/disks/{disk} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index e582590aa0..cde448c5b7 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1219,15 +1219,6 @@ pub enum ExpectedDigest { Sha256(String), } -/// Parameters for importing blocks from a URL to a disk -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct ImportBlocksFromUrl { - /// the source to pull blocks from - pub url: String, - /// Expected digest of all blocks when importing from a URL - pub expected_digest: Option, -} - /// Parameters for importing blocks with a bulk write // equivalent to crucible_pantry_client::types::BulkWriteRequest #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] @@ -1736,12 +1727,6 @@ pub struct SwitchPortApplySettings { #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ImageSource { - Url { - url: String, - - /// The block size in bytes - block_size: BlockSize, - }, Snapshot { id: Uuid, }, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 047bd71814..af17e7e840 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -133,9 +133,6 @@ pub struct Image { /// ID of the parent project if the image is a project image pub project_id: Option, - /// URL source of this image, if any - pub url: Option, - /// The family of the operating system like Debian, Ubuntu, etc. pub os: String, diff --git a/openapi/nexus.json b/openapi/nexus.json index 6076663a2d..7afb6cdc2f 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -699,55 +699,6 @@ } } }, - "/v1/disks/{disk}/import": { - "post": { - "tags": [ - "disks" - ], - "summary": "Request to import blocks from URL", - "operationId": "disk_import_blocks_from_url", - "parameters": [ - { - "in": "path", - "name": "disk", - "description": "Name or ID of the disk", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ImportBlocksFromUrl" - } - } - }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/v1/disks/{disk}/metrics/{metric}": { "get": { "tags": [ @@ -10527,22 +10478,6 @@ "request_id" ] }, - "ExpectedDigest": { - "oneOf": [ - { - "type": "object", - "properties": { - "sha256": { - "type": "string" - } - }, - "required": [ - "sha256" - ], - "additionalProperties": false - } - ] - }, "ExternalIp": { "type": "object", "properties": { @@ -11297,11 +11232,6 @@ "type": "string", "format": "date-time" }, - "url": { - "nullable": true, - "description": "URL source of this image, if any", - "type": "string" - }, "version": { "description": "Version of the operating system", "type": "string" @@ -11378,33 +11308,6 @@ "ImageSource": { "description": "The source of the underlying image.", "oneOf": [ - { - "type": "object", - "properties": { - "block_size": { - "description": "The block size in bytes", - "allOf": [ - { - "$ref": "#/components/schemas/BlockSize" - } - ] - }, - "type": { - "type": "string", - "enum": [ - "url" - ] - }, - "url": { - "type": "string" - } - }, - "required": [ - "block_size", - "type", - "url" - ] - }, { "type": "object", "properties": { @@ -11459,28 +11362,6 @@ "offset" ] }, - "ImportBlocksFromUrl": { - "description": "Parameters for importing blocks from a URL to a disk", - "type": "object", - "properties": { - "expected_digest": { - "nullable": true, - "description": "Expected digest of all blocks when importing from a URL", - "allOf": [ - { - "$ref": "#/components/schemas/ExpectedDigest" - } - ] - }, - "url": { - "description": "the source to pull blocks from", - "type": "string" - } - }, - "required": [ - "url" - ] - }, "Instance": { "description": "View of an Instance", "type": "object", From 76b835d5d68657265564f40a4667e8ffc7026091 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 8 Dec 2023 11:50:54 -0800 Subject: [PATCH 074/186] [sled-agent] add preliminary "write boot disk OS" http endpoints (#4633) This PR adds three new endpoints to `sled-agent`: * `POST /boot-disk/{boot_disk}/os/write` to start a new update to one of the two boot disks * `GET /boot-disk/{boot_disk}/os/write/status` to get the status of the most-recently-started update to the specified boot disk * `DELETE /boot-disk/{boot_disk}/os/write/status/{update_id}` to clear the status of a previous update The actual drive-writing-machinery is extracted from `installinator` into `installinator-common`, which is now a new dependency of `sled-agent`. The bulk of the changes in this PR center around being able to run that drive-write from within the context of the pair of dropshot endpoints above, plus a bit of glue for unit testing with "in-memory disks". --- Cargo.lock | 10 + common/src/lib.rs | 2 + common/src/update.rs | 4 +- installinator-common/Cargo.toml | 6 + .../src/block_size_writer.rs | 44 +- installinator-common/src/lib.rs | 4 + installinator-common/src/raw_disk_writer.rs | 123 ++ installinator/src/lib.rs | 1 - installinator/src/write.rs | 67 +- openapi/sled-agent.json | 290 +++ sled-agent/Cargo.toml | 6 + sled-agent/src/boot_disk_os_writer.rs | 1669 +++++++++++++++++ sled-agent/src/config.rs | 6 + sled-agent/src/http_entrypoints.rs | 172 +- sled-agent/src/lib.rs | 1 + sled-agent/src/server.rs | 7 +- sled-agent/src/sled_agent.rs | 13 + smf/sled-agent/gimlet-standalone/config.toml | 5 + smf/sled-agent/gimlet/config.toml | 5 + smf/sled-agent/non-gimlet/config.toml | 5 + 20 files changed, 2374 insertions(+), 66 deletions(-) rename {installinator => installinator-common}/src/block_size_writer.rs (81%) create mode 100644 installinator-common/src/raw_disk_writer.rs create mode 100644 sled-agent/src/boot_disk_os_writer.rs diff --git a/Cargo.lock b/Cargo.lock index ed988f4b14..71cca52057 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3314,12 +3314,16 @@ dependencies = [ "anyhow", "camino", "illumos-utils", + "libc", "omicron-workspace-hack", + "proptest", "schemars", "serde", "serde_json", "serde_with", + "test-strategy", "thiserror", + "tokio", "update-engine", ] @@ -4867,6 +4871,7 @@ dependencies = [ "crucible-agent-client", "ddm-admin-client", "derive_more", + "display-error-chain", "dns-server", "dns-service-client", "dpd-client", @@ -4876,10 +4881,12 @@ dependencies = [ "futures", "gateway-client", "glob", + "hex", "http", "hyper", "hyper-staticfile", "illumos-utils", + "installinator-common", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4907,6 +4914,7 @@ dependencies = [ "schemars", "semver 1.0.20", "serde", + "serde_human_bytes", "serde_json", "serial_test", "sha3", @@ -4925,6 +4933,8 @@ dependencies = [ "thiserror", "tofino", "tokio", + "tokio-stream", + "tokio-util", "toml 0.8.8", "usdt", "uuid", diff --git a/common/src/lib.rs b/common/src/lib.rs index 1d2ed0afdb..0d63de90fb 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -31,6 +31,8 @@ pub mod postgres_config; pub mod update; pub mod vlan; +pub use update::hex_schema; + #[macro_export] macro_rules! generate_logging_api { ($path:literal) => { diff --git a/common/src/update.rs b/common/src/update.rs index 81256eb526..28d5ae50a6 100644 --- a/common/src/update.rs +++ b/common/src/update.rs @@ -296,7 +296,9 @@ impl FromStr for ArtifactHash { } } -fn hex_schema(gen: &mut SchemaGenerator) -> Schema { +/// Produce an OpenAPI schema describing a hex array of a specific length (e.g., +/// a hash digest). +pub fn hex_schema(gen: &mut SchemaGenerator) -> Schema { let mut schema: SchemaObject = ::json_schema(gen).into(); schema.format = Some(format!("hex string ({N} bytes)")); schema.into() diff --git a/installinator-common/Cargo.toml b/installinator-common/Cargo.toml index 4381de74eb..dd8540c6f8 100644 --- a/installinator-common/Cargo.toml +++ b/installinator-common/Cargo.toml @@ -8,10 +8,16 @@ license = "MPL-2.0" anyhow.workspace = true camino.workspace = true illumos-utils.workspace = true +libc.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true serde_with.workspace = true thiserror.workspace = true +tokio.workspace = true update-engine.workspace = true omicron-workspace-hack.workspace = true + +[dev-dependencies] +proptest.workspace = true +test-strategy.workspace = true diff --git a/installinator/src/block_size_writer.rs b/installinator-common/src/block_size_writer.rs similarity index 81% rename from installinator/src/block_size_writer.rs rename to installinator-common/src/block_size_writer.rs index 3f41a4ee99..1548594b41 100644 --- a/installinator/src/block_size_writer.rs +++ b/installinator-common/src/block_size_writer.rs @@ -11,31 +11,37 @@ use tokio::io::AsyncWrite; /// `BlockSizeBufWriter` is analogous to a tokio's `BufWriter`, except it /// guarantees that writes made to the underlying writer are always -/// _exactly_ the requested block size, with two exceptions: explicitly -/// calling (1) `flush()` or (2) `shutdown()` will write any -/// buffered-but-not-yet-written data to the underlying buffer regardless of -/// its length. +/// _exactly_ the requested block size, with three exceptions: +/// +/// 1. Calling `flush()` will write any currently-buffered data to the +/// underlying writer, regardless of its length. +/// 2. Similarily, calling `shutdown()` will flush any currently-buffered data +/// to the underlying writer. +/// 3. When `BlockSizeBufWriter` attempts to write a block-length amount of data +/// to the underlying writer, if that writer only accepts a portion of that +/// data, `BlockSizeBufWriter` will continue attempting to write the +/// remainder of the block. /// /// When `BlockSizeBufWriter` is dropped, any buffered data it's holding /// will be discarded. It is critical to manually call /// `BlockSizeBufWriter:flush()` or `BlockSizeBufWriter::shutdown()` prior /// to dropping to avoid data loss. -pub(crate) struct BlockSizeBufWriter { +pub struct BlockSizeBufWriter { inner: W, buf: Vec, block_size: usize, } impl BlockSizeBufWriter { - pub(crate) fn with_block_size(block_size: usize, inner: W) -> Self { + pub fn with_block_size(block_size: usize, inner: W) -> Self { Self { inner, buf: Vec::with_capacity(block_size), block_size } } - pub(crate) fn into_inner(self) -> W { + pub fn into_inner(self) -> W { self.inner } - pub(crate) fn block_size(&self) -> usize { + pub fn block_size(&self) -> usize { self.block_size } @@ -46,6 +52,13 @@ impl BlockSizeBufWriter { fn flush_buf(&mut self, cx: &mut Context<'_>) -> Poll> { let mut written = 0; let mut ret = Ok(()); + + // We expect this loop to execute exactly one time: we try to write the + // entirety of `self.buf` to `self.inner`, and presumably it is a type + // that expects to receive a block of data at once, so we'll immediately + // jump to `written == self.buf.len()`. If it returns `Ok(n)` for some + // `n < self.buf.len()`, we'll loop and try to write the rest of the + // data in less-than-block-sized chunks. while written < self.buf.len() { match ready!( Pin::new(&mut self.inner).poll_write(cx, &self.buf[written..]) @@ -128,8 +141,8 @@ impl AsyncWrite for BlockSizeBufWriter { #[cfg(test)] mod tests { use super::*; - use crate::test_helpers::with_test_runtime; use anyhow::Result; + use std::future::Future; use test_strategy::proptest; use tokio::io::AsyncWriteExt; @@ -167,6 +180,19 @@ mod tests { } } + fn with_test_runtime(f: F) -> T + where + F: FnOnce() -> Fut, + Fut: Future, + { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_time() + .start_paused(true) + .build() + .expect("tokio Runtime built successfully"); + runtime.block_on(f()) + } + #[proptest] fn proptest_block_writer( chunks: Vec>, diff --git a/installinator-common/src/lib.rs b/installinator-common/src/lib.rs index b77385840f..4771de7b27 100644 --- a/installinator-common/src/lib.rs +++ b/installinator-common/src/lib.rs @@ -4,6 +4,10 @@ //! Common types shared by the installinator client and server. +mod block_size_writer; mod progress; +mod raw_disk_writer; +pub use block_size_writer::*; pub use progress::*; +pub use raw_disk_writer::*; diff --git a/installinator-common/src/raw_disk_writer.rs b/installinator-common/src/raw_disk_writer.rs new file mode 100644 index 0000000000..35d3862e67 --- /dev/null +++ b/installinator-common/src/raw_disk_writer.rs @@ -0,0 +1,123 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Async writer for raw disks on illumos (e.g., host OS phase 2 images written +//! to M.2 drives). + +use crate::BlockSizeBufWriter; +use illumos_utils::dkio; +use illumos_utils::dkio::MediaInfoExtended; +use std::io; +use std::os::fd::AsRawFd; +use std::path::Path; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use tokio::fs::File; +use tokio::io::AsyncWrite; +use tokio::io::AsyncWriteExt; + +/// Writer for illumos raw disks. +/// +/// Construct an instance via [`RawDiskWriter::open()`], write to it just like +/// any other async writer (it will handle passing writes down to the device in +/// chunks of length [`RawDiskWriter::block_size()`]), and then call +/// [`RawDiskWriter::finalize()`]. It is **critical** to call `finalize()`; +/// failure to do so will likely lead to data loss. +/// +/// `RawDiskWriter` attempts to be as conservative as it can about ensuring data +/// is written: +/// +/// * The device is opened with `O_SYNC` +/// * In `finalize()`, the file is `fsync`'d after any remaining data is flushed +/// * In `finalize()`, the disk write cache is flushed (if supported by the +/// target device) +/// +/// Writing an amount of data that is not a multiple of the device's +/// `block_size()` will likely result in a failure when writing / flushing the +/// final not-correctly-sized chunk. +/// +/// This type is illumos-specific due to using dkio for two things: +/// +/// 1. Determining the logical block size of the device +/// 2. Flushing the disk write cache +pub struct RawDiskWriter { + inner: BlockSizeBufWriter, +} + +impl RawDiskWriter { + /// Open the disk device at `path` for writing, and attempt to determine its + /// logical block size via [`MediaInfoExtended`]. + pub async fn open(path: &Path) -> io::Result { + let f = tokio::fs::OpenOptions::new() + .create(false) + .write(true) + .truncate(false) + .custom_flags(libc::O_SYNC) + .open(path) + .await?; + + let media_info = MediaInfoExtended::from_fd(f.as_raw_fd())?; + + let inner = BlockSizeBufWriter::with_block_size( + media_info.logical_block_size as usize, + f, + ); + + Ok(Self { inner }) + } + + /// The logical block size of the underlying device. + pub fn block_size(&self) -> usize { + self.inner.block_size() + } + + /// Flush any remaining data and attempt to ensure synchronization with the + /// device. + pub async fn finalize(mut self) -> io::Result<()> { + // Flush any remaining data in our buffer + self.inner.flush().await?; + + // `fsync` the file... + let f = self.inner.into_inner(); + f.sync_all().await?; + + // ...and also attempt to flush the disk write cache + tokio::task::spawn_blocking(move || { + match dkio::flush_write_cache(f.as_raw_fd()) { + Ok(()) => Ok(()), + // Some drives don't support `flush_write_cache`; we don't want + // to fail in this case. + Err(err) if err.raw_os_error() == Some(libc::ENOTSUP) => Ok(()), + Err(err) => Err(err), + } + }) + .await + .expect("task panicked") + } +} + +impl AsyncWrite for RawDiskWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + Pin::new(&mut self.inner).poll_write(cx, buf) + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.inner).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.inner).poll_shutdown(cx) + } +} diff --git a/installinator/src/lib.rs b/installinator/src/lib.rs index c7de189576..3b1d768a7d 100644 --- a/installinator/src/lib.rs +++ b/installinator/src/lib.rs @@ -4,7 +4,6 @@ mod artifact; mod async_temp_file; -mod block_size_writer; mod bootstrap; mod dispatch; mod errors; diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 22dd2adbf6..380595b4cd 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -6,7 +6,6 @@ use std::{ collections::{btree_map::Entry, BTreeMap, BTreeSet}, fmt, io::{self, Read}, - os::fd::AsRawFd, time::Duration, }; @@ -15,14 +14,11 @@ use async_trait::async_trait; use buf_list::BufList; use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; -use illumos_utils::{ - dkio::{self, MediaInfoExtended}, - zpool::{Zpool, ZpoolName}, -}; +use illumos_utils::zpool::{Zpool, ZpoolName}; use installinator_common::{ - ControlPlaneZonesSpec, ControlPlaneZonesStepId, M2Slot, StepContext, - StepProgress, StepResult, StepSuccess, UpdateEngine, WriteComponent, - WriteError, WriteOutput, WriteSpec, WriteStepId, + ControlPlaneZonesSpec, ControlPlaneZonesStepId, M2Slot, RawDiskWriter, + StepContext, StepProgress, StepResult, StepSuccess, UpdateEngine, + WriteComponent, WriteError, WriteOutput, WriteSpec, WriteStepId, }; use omicron_common::update::{ArtifactHash, ArtifactHashId}; use sha2::{Digest, Sha256}; @@ -36,10 +32,7 @@ use update_engine::{ errors::NestedEngineError, events::ProgressUnits, StepSpec, }; -use crate::{ - async_temp_file::AsyncNamedTempFile, block_size_writer::BlockSizeBufWriter, - hardware::Hardware, -}; +use crate::{async_temp_file::AsyncNamedTempFile, hardware::Hardware}; #[derive(Clone, Debug)] struct ArtifactDestination { @@ -754,28 +747,13 @@ impl WriteTransportWriter for AsyncNamedTempFile { } #[async_trait] -impl WriteTransportWriter for BlockSizeBufWriter { +impl WriteTransportWriter for RawDiskWriter { fn block_size(&self) -> Option { - Some(BlockSizeBufWriter::block_size(self)) + Some(RawDiskWriter::block_size(self)) } async fn finalize(self) -> io::Result<()> { - let f = self.into_inner(); - f.sync_all().await?; - - // We only create `BlockSizeBufWriter` for the raw block device storing - // the OS ramdisk. After `fsync`'ing, also flush the write cache. - tokio::task::spawn_blocking(move || { - match dkio::flush_write_cache(f.as_raw_fd()) { - Ok(()) => Ok(()), - // Some drives don't support `flush_write_cache`; we don't want - // to fail in this case. - Err(err) if err.raw_os_error() == Some(libc::ENOTSUP) => Ok(()), - Err(err) => Err(err), - } - }) - .await - .unwrap() + RawDiskWriter::finalize(self).await } } @@ -810,7 +788,7 @@ struct BlockDeviceTransport; #[async_trait] impl WriteTransport for BlockDeviceTransport { - type W = BlockSizeBufWriter; + type W = RawDiskWriter; async fn make_writer( &mut self, @@ -819,12 +797,7 @@ impl WriteTransport for BlockDeviceTransport { destination: &Utf8Path, total_bytes: u64, ) -> Result { - let f = tokio::fs::OpenOptions::new() - .create(false) - .write(true) - .truncate(false) - .custom_flags(libc::O_SYNC) - .open(destination) + let writer = RawDiskWriter::open(destination.as_std_path()) .await .map_err(|error| WriteError::WriteError { component, @@ -834,18 +807,7 @@ impl WriteTransport for BlockDeviceTransport { error, })?; - let media_info = - MediaInfoExtended::from_fd(f.as_raw_fd()).map_err(|error| { - WriteError::WriteError { - component, - slot, - written_bytes: 0, - total_bytes, - error, - } - })?; - - let block_size = u64::from(media_info.logical_block_size); + let block_size = writer.block_size() as u64; // When writing to a block device, we must write a multiple of the block // size. We can assume the image we're given should be @@ -858,12 +820,15 @@ impl WriteTransport for BlockDeviceTransport { total_bytes, error: io::Error::new( io::ErrorKind::InvalidData, - format!("file size ({total_bytes}) is not a multiple of target device block size ({block_size})") + format!( + "file size ({total_bytes}) is not a multiple of \ + target device block size ({block_size})" + ), ), }); } - Ok(BlockSizeBufWriter::with_block_size(block_size as usize, f)) + Ok(writer) } } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 3a88b6cc9c..f809cfa57b 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -10,6 +10,132 @@ "version": "0.0.1" }, "paths": { + "/boot-disk/{boot_disk}/os/write": { + "post": { + "summary": "Write a new host OS image to the specified boot disk", + "operationId": "host_os_write_start", + "parameters": [ + { + "in": "path", + "name": "boot_disk", + "required": true, + "schema": { + "$ref": "#/components/schemas/M2Slot" + } + }, + { + "in": "query", + "name": "sha3_256_digest", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } + }, + { + "in": "query", + "name": "update_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/boot-disk/{boot_disk}/os/write/status": { + "get": { + "summary": "Get the status of writing a new host OS", + "operationId": "host_os_write_status_get", + "parameters": [ + { + "in": "path", + "name": "boot_disk", + "required": true, + "schema": { + "$ref": "#/components/schemas/M2Slot" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BootDiskOsWriteStatus" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/boot-disk/{boot_disk}/os/write/status/{update_id}": { + "delete": { + "summary": "Clear the status of a completed write of a new host OS", + "operationId": "host_os_write_status_delete", + "parameters": [ + { + "in": "path", + "name": "boot_disk", + "required": true, + "schema": { + "$ref": "#/components/schemas/M2Slot" + } + }, + { + "in": "path", + "name": "update_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/cockroachdb": { "post": { "summary": "Initializes a CockroachDB cluster", @@ -2135,6 +2261,162 @@ "range" ] }, + "BootDiskOsWriteProgress": { + "description": "Current progress of an OS image being written to disk.", + "oneOf": [ + { + "description": "The image is still being uploaded.", + "type": "object", + "properties": { + "bytes_received": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "state": { + "type": "string", + "enum": [ + "receiving_uploaded_image" + ] + } + }, + "required": [ + "bytes_received", + "state" + ] + }, + { + "description": "The image is being written to disk.", + "type": "object", + "properties": { + "bytes_written": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "state": { + "type": "string", + "enum": [ + "writing_image_to_disk" + ] + } + }, + "required": [ + "bytes_written", + "state" + ] + }, + { + "description": "The image is being read back from disk for validation.", + "type": "object", + "properties": { + "bytes_read": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "state": { + "type": "string", + "enum": [ + "validating_written_image" + ] + } + }, + "required": [ + "bytes_read", + "state" + ] + } + ] + }, + "BootDiskOsWriteStatus": { + "description": "Status of an update to a boot disk OS.", + "oneOf": [ + { + "description": "No update has been started for this disk, or any previously-started update has completed and had its status cleared.", + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "no_update_started" + ] + } + }, + "required": [ + "status" + ] + }, + { + "description": "An update is currently running.", + "type": "object", + "properties": { + "progress": { + "$ref": "#/components/schemas/BootDiskOsWriteProgress" + }, + "status": { + "type": "string", + "enum": [ + "in_progress" + ] + }, + "update_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "progress", + "status", + "update_id" + ] + }, + { + "description": "The most recent update completed successfully.", + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "complete" + ] + }, + "update_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "status", + "update_id" + ] + }, + { + "description": "The most recent update failed.", + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "status": { + "type": "string", + "enum": [ + "failed" + ] + }, + "update_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "message", + "status", + "update_id" + ] + } + ] + }, "BundleUtilization": { "description": "The portion of a debug dataset used for zone bundles.", "type": "object", @@ -6485,6 +6767,14 @@ "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", "type": "string", "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + }, + "M2Slot": { + "description": "An M.2 slot that was written.", + "type": "string", + "enum": [ + "A", + "B" + ] } }, "responses": { diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 61e61709e1..7607d57b95 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -25,14 +25,17 @@ derive_more.workspace = true dns-server.workspace = true dns-service-client.workspace = true dpd-client.workspace = true +display-error-chain.workspace = true dropshot.workspace = true flate2.workspace = true futures.workspace = true glob.workspace = true +hex.workspace = true http.workspace = true hyper-staticfile.workspace = true gateway-client.workspace = true illumos-utils.workspace = true +installinator-common.workspace = true internal-dns.workspace = true ipnetwork.workspace = true itertools.workspace = true @@ -53,6 +56,7 @@ reqwest = { workspace = true, features = ["rustls-tls", "stream"] } schemars = { workspace = true, features = [ "chrono", "uuid1" ] } semver.workspace = true serde.workspace = true +serde_human_bytes.workspace = true serde_json = {workspace = true, features = ["raw_value"]} sha3.workspace = true sled-agent-client.workspace = true @@ -93,6 +97,8 @@ subprocess.workspace = true slog-async.workspace = true slog-term.workspace = true tempfile.workspace = true +tokio-stream.workspace = true +tokio-util.workspace = true illumos-utils = { workspace = true, features = ["testing", "tmp_keypath"] } sled-storage = { workspace = true, features = ["testing"] } diff --git a/sled-agent/src/boot_disk_os_writer.rs b/sled-agent/src/boot_disk_os_writer.rs new file mode 100644 index 0000000000..a0798ed174 --- /dev/null +++ b/sled-agent/src/boot_disk_os_writer.rs @@ -0,0 +1,1669 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! This module provides `BootDiskOsWriter`, via which sled-agent can write new +//! OS images to its boot disks. + +use crate::http_entrypoints::BootDiskOsWriteProgress; +use crate::http_entrypoints::BootDiskOsWriteStatus; +use async_trait::async_trait; +use bytes::Bytes; +use camino::Utf8PathBuf; +use display_error_chain::DisplayErrorChain; +use dropshot::HttpError; +use futures::Stream; +use futures::TryStreamExt; +use installinator_common::M2Slot; +use installinator_common::RawDiskWriter; +use sha3::Digest; +use sha3::Sha3_256; +use slog::Logger; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::io; +use std::io::Read; +use std::path::Path; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::fs::File; +use tokio::io::AsyncReadExt; +use tokio::io::AsyncSeekExt; +use tokio::io::AsyncWrite; +use tokio::io::AsyncWriteExt; +use tokio::io::BufReader; +use tokio::sync::oneshot; +use tokio::sync::oneshot::error::TryRecvError; +use tokio::sync::watch; +use uuid::Uuid; + +impl BootDiskOsWriteStatus { + fn from_result( + update_id: Uuid, + result: &Result<(), Arc>, + ) -> Self { + match result { + Ok(()) => Self::Complete { update_id }, + Err(err) => Self::Failed { + update_id, + message: DisplayErrorChain::new(err).to_string(), + }, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum BootDiskOsWriteError { + // This variant should be impossible in production, as we build with + // panic=abort, but may be constructed in tests (e.g., during tokio runtime + // shutdown). + #[error("internal error (task panic)")] + TaskPanic, + #[error("an update is still running ({0})")] + UpdateRunning(Uuid), + #[error("a previous update completed ({0}); clear its status before starting a new update")] + CannotStartWithoutClearingPreviousStatus(Uuid), + #[error("failed to create temporary file")] + FailedCreatingTempfile(#[source] io::Error), + #[error("failed writing to temporary file")] + FailedWritingTempfile(#[source] io::Error), + #[error("failed downloading image from HTTP client")] + FailedDownloadingImage(#[source] HttpError), + #[error("hash mismatch in image from HTTP client: expected {expected} but got {got}")] + UploadedImageHashMismatch { expected: String, got: String }, + #[error("failed to open disk for writing {path}")] + FailedOpenDiskForWrite { + #[source] + error: io::Error, + path: Utf8PathBuf, + }, + #[error("image size ({image_size}) is not a multiple of disk block size ({disk_block_size})")] + ImageSizeNotMultipleOfBlockSize { + image_size: usize, + disk_block_size: usize, + }, + #[error("failed reading from temporary file")] + FailedReadingTempfile(#[source] io::Error), + #[error("failed writing to disk {path}")] + FailedWritingDisk { + #[source] + error: io::Error, + path: Utf8PathBuf, + }, + #[error("failed to open disk for reading {path}")] + FailedOpenDiskForRead { + #[source] + error: io::Error, + path: Utf8PathBuf, + }, + #[error("failed reading from disk {path}")] + FailedReadingDisk { + #[source] + error: io::Error, + path: Utf8PathBuf, + }, + #[error("hash mismatch after writing disk {path}: expected {expected} but got {got}")] + WrittenImageHashMismatch { + path: Utf8PathBuf, + expected: String, + got: String, + }, + #[error("unexpected update ID {0}: cannot clear status")] + WrongUpdateIdClearingStatus(Uuid), +} + +impl From<&BootDiskOsWriteError> for HttpError { + fn from(error: &BootDiskOsWriteError) -> Self { + let message = DisplayErrorChain::new(error).to_string(); + match error { + BootDiskOsWriteError::UpdateRunning(_) + | BootDiskOsWriteError::CannotStartWithoutClearingPreviousStatus( + _, + ) + | BootDiskOsWriteError::FailedDownloadingImage(_) + | BootDiskOsWriteError::UploadedImageHashMismatch { .. } + | BootDiskOsWriteError::ImageSizeNotMultipleOfBlockSize { + .. + } + | BootDiskOsWriteError::WrongUpdateIdClearingStatus(_) => { + HttpError::for_bad_request(None, message) + } + BootDiskOsWriteError::TaskPanic + | BootDiskOsWriteError::FailedCreatingTempfile(_) + | BootDiskOsWriteError::FailedWritingTempfile(_) + | BootDiskOsWriteError::FailedReadingTempfile(_) + | BootDiskOsWriteError::FailedOpenDiskForWrite { .. } + | BootDiskOsWriteError::FailedOpenDiskForRead { .. } + | BootDiskOsWriteError::FailedWritingDisk { .. } + | BootDiskOsWriteError::FailedReadingDisk { .. } + | BootDiskOsWriteError::WrittenImageHashMismatch { .. } => { + HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + } + } + } + } +} + +// Note to future maintainers: `installinator` uses the `update_engine` crate to +// drive its process (which includes writing the boot disk). We could also use +// `update_engine` inside `BootDiskOsWriter`; instead, we've hand-rolled a state +// machine with manual progress reporting. The current implementation is +// _probably_ simple enough that this was a reasonable choice, but if it becomes +// more complex (or if additional work needs to be done that `update_engine` +// would make easier), consider switching it over. +#[derive(Debug)] +pub(crate) struct BootDiskOsWriter { + // Note: We use a std Mutex here to avoid cancellation issues with tokio + // Mutex. We never need to keep the lock held longer than necessary to copy + // or replace the current writer state. + states: Mutex>, + log: Logger, +} + +impl BootDiskOsWriter { + pub(crate) fn new(log: &Logger) -> Self { + Self { + states: Mutex::default(), + log: log.new(slog::o!("component" => "BootDiskOsWriter")), + } + } + + /// Attempt to start a new update to the given disk (identified by both its + /// slot and the path to its devfs device). + /// + /// This method will return after the `image_upload` stream has been saved + /// to a local temporary file, but before the update has completed. Callers + /// must poll `status()` to discover when the running update completes (or + /// fails). + /// + /// # Errors + /// + /// This method will return an error and not start an update if any of the + /// following are true: + /// + /// * A previously-started update of this same `boot_disk` is still running + /// * A previously-completed update has not had its status cleared + /// * The `image_upload` stream returns an error + /// * The hash of the data provided by `image_upload` does not match + /// `sha3_256_digest` + /// * Any of a variety of I/O errors occurs while copying from + /// `image_upload` to a temporary file + /// + /// In all but the first case, the error returned will also be saved and + /// returned when `status()` is called (until another update is started). + pub(crate) async fn start_update( + &self, + boot_disk: M2Slot, + disk_devfs_path: Utf8PathBuf, + update_id: Uuid, + sha3_256_digest: [u8; 32], + image_upload: S, + ) -> Result<(), Arc> + where + S: Stream> + Send + 'static, + { + self.start_update_impl( + boot_disk, + disk_devfs_path, + update_id, + sha3_256_digest, + image_upload, + RealDiskInterface {}, + ) + .await + } + + async fn start_update_impl( + &self, + boot_disk: M2Slot, + disk_devfs_path: Utf8PathBuf, + update_id: Uuid, + sha3_256_digest: [u8; 32], + image_upload: S, + disk_writer: Writer, + ) -> Result<(), Arc> + where + S: Stream> + Send + 'static, + Writer: DiskInterface + Send + Sync + 'static, + { + // Construct a closure that will spawn a task to drive this update, but + // don't actually start it yet: we only allow an update to start if + // there's not currently an update running targetting the same slot, so + // we'll spawn this after checking that below. + let spawn_update_task = || { + let (uploaded_image_tx, uploaded_image_rx) = oneshot::channel(); + let (progress_tx, progress_rx) = watch::channel( + BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received: 0, + }, + ); + let (complete_tx, complete_rx) = oneshot::channel(); + let task = BootDiskOsWriteTask { + log: self + .log + .new(slog::o!("update_id" => update_id.to_string())), + disk_devfs_path, + sha3_256_digest, + progress_tx, + disk_interface: disk_writer, + }; + tokio::spawn(task.run( + image_upload, + uploaded_image_tx, + complete_tx, + )); + ( + uploaded_image_rx, + TaskRunningState { update_id, progress_rx, complete_rx }, + ) + }; + + // Either call `spawn_update_task` and get back the handle to + // `uploaded_image_rx`, or return an error (if another update for this + // boot disk is still running). + let uploaded_image_rx = { + let mut states = self.states.lock().unwrap(); + match states.entry(boot_disk) { + Entry::Vacant(slot) => { + let (uploaded_image_rx, running) = spawn_update_task(); + slot.insert(WriterState::TaskRunning(running)); + uploaded_image_rx + } + Entry::Occupied(mut slot) => match slot.get_mut() { + WriterState::TaskRunning(running) => { + // It's possible this task is actually complete and a + // result is sitting in the `running.complete_rx` + // oneshot, but for the purposes of starting a new + // update it doesn't matter either way: we'll refuse to + // start. Return the "another update running" error; the + // caller will have to check the `status()`, which will + // trigger a "see if it's actually done after all" + // check. + return Err(Arc::new( + BootDiskOsWriteError::UpdateRunning( + running.update_id, + ), + )); + } + WriterState::Complete(complete) => { + return Err(Arc::new( + BootDiskOsWriteError::CannotStartWithoutClearingPreviousStatus( + complete.update_id, + ))); + } + }, + } + }; + + // We've now spawned a task to drive the update, and we want to wait for + // it to finish copying from `image_upload`. + uploaded_image_rx.await.map_err(|_| BootDiskOsWriteError::TaskPanic)? + } + + /// Clear the status of a finished or failed update with the given ID + /// targetting `boot_disk`. + /// + /// If no update has ever been started for this `boot_disk`, returns + /// `Ok(())`. + /// + /// # Errors + /// + /// Fails if an update to `boot_disk` is currently running; only terminal + /// statuses can be cleared. Fails if the most recent terminal status + /// targetting `boot_disk` had a different update ID. + pub(crate) fn clear_terminal_status( + &self, + boot_disk: M2Slot, + update_id: Uuid, + ) -> Result<(), BootDiskOsWriteError> { + let mut states = self.states.lock().unwrap(); + let mut slot = match states.entry(boot_disk) { + // No status; nothing to clear. + Entry::Vacant(_slot) => return Ok(()), + Entry::Occupied(slot) => slot, + }; + + match slot.get_mut() { + WriterState::Complete(complete) => { + if complete.update_id == update_id { + slot.remove(); + Ok(()) + } else { + Err(BootDiskOsWriteError::WrongUpdateIdClearingStatus( + complete.update_id, + )) + } + } + WriterState::TaskRunning(running) => { + // Check whether the task is _actually_ still running, + // or whether it's done and just waiting for us to + // realize it. + match running.complete_rx.try_recv() { + Ok(result) => { + if running.update_id == update_id { + // This is a little surprising but legal: we've been + // asked to clear the terminal status of this + // update_id, even though we just realized it + // finished. + slot.remove(); + Ok(()) + } else { + let running_update_id = running.update_id; + // A different update just finished; store the + // result we got from the oneshot and don't remove + // the status. + slot.insert(WriterState::Complete( + TaskCompleteState { + update_id: running_update_id, + result, + }, + )); + Err(BootDiskOsWriteError::WrongUpdateIdClearingStatus( + running_update_id + )) + } + } + Err(TryRecvError::Empty) => Err( + BootDiskOsWriteError::UpdateRunning(running.update_id), + ), + Err(TryRecvError::Closed) => { + Err(BootDiskOsWriteError::TaskPanic) + } + } + } + } + } + + /// Get the status of any update running that targets `boot_disk`. + pub(crate) fn status(&self, boot_disk: M2Slot) -> BootDiskOsWriteStatus { + let mut states = self.states.lock().unwrap(); + let mut slot = match states.entry(boot_disk) { + Entry::Vacant(_) => return BootDiskOsWriteStatus::NoUpdateStarted, + Entry::Occupied(slot) => slot, + }; + + match slot.get_mut() { + WriterState::TaskRunning(running) => { + // Is the task actually still running? Check and see if it's + // sent us a result that we just haven't noticed yet. + match running.complete_rx.try_recv() { + Ok(result) => { + let update_id = running.update_id; + let status = BootDiskOsWriteStatus::from_result( + update_id, &result, + ); + slot.insert(WriterState::Complete(TaskCompleteState { + update_id, + result, + })); + status + } + Err(TryRecvError::Empty) => { + let progress = *running.progress_rx.borrow_and_update(); + BootDiskOsWriteStatus::InProgress { + update_id: running.update_id, + progress, + } + } + Err(TryRecvError::Closed) => { + let update_id = running.update_id; + let result = + Err(Arc::new(BootDiskOsWriteError::TaskPanic)); + let status = BootDiskOsWriteStatus::from_result( + update_id, &result, + ); + slot.insert(WriterState::Complete(TaskCompleteState { + update_id, + result, + })); + status + } + } + } + WriterState::Complete(complete) => { + BootDiskOsWriteStatus::from_result( + complete.update_id, + &complete.result, + ) + } + } + } +} + +#[derive(Debug)] +enum WriterState { + /// A task is running to write a new image to a boot disk. + TaskRunning(TaskRunningState), + /// The result of the most recent write. + Complete(TaskCompleteState), +} + +#[derive(Debug)] +struct TaskRunningState { + update_id: Uuid, + progress_rx: watch::Receiver, + complete_rx: oneshot::Receiver>>, +} + +#[derive(Debug)] +struct TaskCompleteState { + update_id: Uuid, + result: Result<(), Arc>, +} + +#[derive(Debug)] +struct BootDiskOsWriteTask { + log: Logger, + sha3_256_digest: [u8; 32], + disk_devfs_path: Utf8PathBuf, + progress_tx: watch::Sender, + disk_interface: W, +} + +impl BootDiskOsWriteTask { + async fn run( + self, + image_upload: S, + uploaded_image_tx: oneshot::Sender< + Result<(), Arc>, + >, + complete_tx: oneshot::Sender>>, + ) where + S: Stream> + Send + 'static, + { + let result = self.run_impl(image_upload, uploaded_image_tx).await; + + // It's possible (albeit unlikely) our caller has discarded the receive + // half of this channel; ignore any send error. + _ = complete_tx.send(result); + } + + async fn run_impl( + self, + image_upload: S, + uploaded_image_tx: oneshot::Sender< + Result<(), Arc>, + >, + ) -> Result<(), Arc> + where + S: Stream> + Send + 'static, + { + // Copy from `image_upload` into a tempfile, then report the result on + // `uploaded_image_tx`. Our dropshot server will not respond to the + // client that requested this update until we finish this step and send + // a response on `uploaded_image_tx`, as `image_upload` is the + // `StreamingBody` attached to their request. + // + // If this step fails, we will send the error to the client who sent the + // request _and_ store a copy of the same error in our current update + // state. + let (image_tempfile, image_size) = match self + .download_body_to_tempfile(image_upload) + .await + .map_err(Arc::new) + { + Ok(tempfile) => { + _ = uploaded_image_tx.send(Ok(())); + tempfile + } + Err(err) => { + _ = uploaded_image_tx.send(Err(Arc::clone(&err))); + return Err(err); + } + }; + + let disk_block_size = self + .copy_tempfile_to_disk(image_tempfile, image_size) + .await + .map_err(Arc::new)?; + + self.validate_written_image(image_size, disk_block_size) + .await + .map_err(Arc::new)?; + + Ok(()) + } + + async fn download_body_to_tempfile( + &self, + image_upload: S, + ) -> Result<(File, usize), BootDiskOsWriteError> + where + S: Stream> + Send + 'static, + { + let tempfile = camino_tempfile::tempfile() + .map_err(BootDiskOsWriteError::FailedCreatingTempfile)?; + + let mut tempfile = + tokio::io::BufWriter::new(tokio::fs::File::from_std(tempfile)); + + let mut image_upload = std::pin::pin!(image_upload); + let mut hasher = Sha3_256::default(); + let mut bytes_received = 0; + + // Stream the uploaded image into our tempfile. + while let Some(bytes) = image_upload + .try_next() + .await + .map_err(BootDiskOsWriteError::FailedDownloadingImage)? + { + hasher.update(&bytes); + tempfile + .write_all(&bytes) + .await + .map_err(BootDiskOsWriteError::FailedWritingTempfile)?; + bytes_received += bytes.len(); + self.progress_tx.send_modify(|progress| { + *progress = BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received, + } + }); + } + + // Flush any remaining buffered data. + tempfile + .flush() + .await + .map_err(BootDiskOsWriteError::FailedWritingTempfile)?; + + // Rewind the tempfile. + let mut tempfile = tempfile.into_inner(); + tempfile + .rewind() + .await + .map_err(BootDiskOsWriteError::FailedWritingTempfile)?; + + // Ensure the data the client sent us matches the hash they also sent + // us. A failure here means either the client lied or something has gone + // horribly wrong. + let hash = hasher.finalize(); + let expected_hash_str = hex::encode(&self.sha3_256_digest); + if hash == self.sha3_256_digest.into() { + info!( + self.log, "received uploaded image"; + "bytes_received" => bytes_received, + "hash" => expected_hash_str, + ); + + Ok((tempfile, bytes_received)) + } else { + let computed_hash_str = hex::encode(&hash); + error!( + self.log, "received uploaded image: incorrect hash"; + "bytes_received" => bytes_received, + "computed_hash" => &computed_hash_str, + "expected_hash" => &expected_hash_str, + ); + + Err(BootDiskOsWriteError::UploadedImageHashMismatch { + expected: expected_hash_str, + got: computed_hash_str, + }) + } + } + + /// Copy from `image_tempfile` to the disk device at `self.disk_devfs_path`. + /// Returns the block size of that disk. + async fn copy_tempfile_to_disk( + &self, + image_tempfile: File, + image_size: usize, + ) -> Result { + let mut disk_writer = self + .disk_interface + .open_writer(self.disk_devfs_path.as_std_path()) + .await + .map_err(|error| BootDiskOsWriteError::FailedOpenDiskForWrite { + error, + path: self.disk_devfs_path.clone(), + })?; + + let disk_block_size = disk_writer.block_size(); + + if image_size % disk_block_size != 0 { + return Err( + BootDiskOsWriteError::ImageSizeNotMultipleOfBlockSize { + image_size, + disk_block_size, + }, + ); + } + let num_blocks = image_size / disk_block_size; + + let mut buf = vec![0; disk_block_size]; + let mut image_tempfile = BufReader::new(image_tempfile); + + for block in 0..num_blocks { + image_tempfile + .read_exact(&mut buf) + .await + .map_err(BootDiskOsWriteError::FailedReadingTempfile)?; + + disk_writer.write_all(&buf).await.map_err(|error| { + BootDiskOsWriteError::FailedWritingDisk { + error, + path: self.disk_devfs_path.clone(), + } + })?; + + self.progress_tx.send_modify(|progress| { + *progress = BootDiskOsWriteProgress::WritingImageToDisk { + bytes_written: (block + 1) * buf.len(), + } + }); + } + + disk_writer.finalize().await.map_err(|error| { + BootDiskOsWriteError::FailedWritingDisk { + error, + path: self.disk_devfs_path.clone(), + } + })?; + + info!( + self.log, "copied OS image to disk"; + "path" => %self.disk_devfs_path, + "bytes_written" => image_size, + ); + + Ok(disk_block_size) + } + + async fn validate_written_image( + self, + image_size: usize, + disk_block_size: usize, + ) -> Result<(), BootDiskOsWriteError> { + // We're reading the OS image back from disk and hashing it; this can + // all be synchronous inside a spawn_blocking. + tokio::task::spawn_blocking(move || { + let mut f = self + .disk_interface + .open_reader(self.disk_devfs_path.as_std_path()) + .map_err(|error| { + BootDiskOsWriteError::FailedOpenDiskForRead { + error, + path: self.disk_devfs_path.clone(), + } + })?; + + let mut buf = vec![0; disk_block_size]; + let mut hasher = Sha3_256::default(); + let mut bytes_read = 0; + + while bytes_read < image_size { + // We already confirmed while writing the image that the image + // size is an exact multiple of the disk block size, so we can + // always read a full `buf` here. + f.read_exact(&mut buf).map_err(|error| { + BootDiskOsWriteError::FailedReadingDisk { + error, + path: self.disk_devfs_path.clone(), + } + })?; + + hasher.update(&buf); + bytes_read += buf.len(); + self.progress_tx.send_modify(|progress| { + *progress = + BootDiskOsWriteProgress::ValidatingWrittenImage { + bytes_read, + }; + }); + } + + let expected_hash_str = hex::encode(&self.sha3_256_digest); + let hash = hasher.finalize(); + if hash == self.sha3_256_digest.into() { + info!( + self.log, "validated OS image written to disk"; + "path" => %self.disk_devfs_path, + "hash" => expected_hash_str, + ); + Ok(()) + } else { + let computed_hash_str = hex::encode(&hash); + error!( + self.log, "failed to validate written OS image"; + "bytes_hashed" => image_size, + "computed_hash" => &computed_hash_str, + "expected_hash" => &expected_hash_str, + ); + Err(BootDiskOsWriteError::WrittenImageHashMismatch { + path: self.disk_devfs_path, + expected: expected_hash_str, + got: computed_hash_str, + }) + } + }) + .await + .expect("blocking task panicked") + } +} + +// Utility traits to allow injecting an in-memory "disk" for unit tests. +#[async_trait] +trait DiskWriter: AsyncWrite + Send + Sized + Unpin { + fn block_size(&self) -> usize; + async fn finalize(self) -> io::Result<()>; +} +#[async_trait] +trait DiskInterface: Send + Sync + 'static { + type Writer: DiskWriter; + type Reader: io::Read + Send; + async fn open_writer(&self, path: &Path) -> io::Result; + fn open_reader(&self, path: &Path) -> io::Result; +} + +#[async_trait] +impl DiskWriter for RawDiskWriter { + fn block_size(&self) -> usize { + RawDiskWriter::block_size(self) + } + + async fn finalize(self) -> io::Result<()> { + RawDiskWriter::finalize(self).await + } +} + +struct RealDiskInterface {} + +#[async_trait] +impl DiskInterface for RealDiskInterface { + type Writer = RawDiskWriter; + type Reader = std::fs::File; + + async fn open_writer(&self, path: &Path) -> io::Result { + RawDiskWriter::open(path).await + } + + fn open_reader(&self, path: &Path) -> io::Result { + std::fs::File::open(path) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::future; + use futures::stream; + use installinator_common::BlockSizeBufWriter; + use omicron_test_utils::dev::test_setup_log; + use rand::RngCore; + use std::mem; + use std::pin::Pin; + use std::task::ready; + use std::task::Context; + use std::task::Poll; + use std::time::Duration; + use tokio::sync::mpsc; + use tokio::sync::Semaphore; + use tokio_stream::wrappers::UnboundedReceiverStream; + use tokio_util::sync::PollSemaphore; + + // Most of the tests below end up looping while calling + // `BootDiskOsWriter::status()` waiting for a specific status message to + // arrive. If we get that wrong (or the code under test is wrong!), that + // could end up looping forever, so we run all the relevant bits of the + // tests under a tokio timeout. We expect all the tests to complete very + // quickly in general (< 1 second), so we'll pick something + // outrageously-long-enough that if we hit it, we're almost certainly + // dealing with a hung test. + const TEST_TIMEOUT: Duration = Duration::from_secs(30); + + #[derive(Debug, Clone, PartialEq, Eq)] + struct InMemoryDiskContents { + path: Utf8PathBuf, + data: Vec, + } + + #[derive(Debug, Clone)] + struct InMemoryDiskInterface { + semaphore: Arc, + finalized_writes: Arc>>, + } + + impl InMemoryDiskInterface { + const BLOCK_SIZE: usize = 16; + + fn new(semaphore: Semaphore) -> Self { + Self { + semaphore: Arc::new(semaphore), + finalized_writes: Arc::default(), + } + } + } + + #[async_trait] + impl DiskInterface for InMemoryDiskInterface { + type Writer = InMemoryDiskWriter; + type Reader = io::Cursor>; + + async fn open_writer(&self, path: &Path) -> io::Result { + Ok(InMemoryDiskWriter { + opened_path: path + .to_owned() + .try_into() + .expect("non-utf8 test path"), + data: BlockSizeBufWriter::with_block_size( + Self::BLOCK_SIZE, + Vec::new(), + ), + semaphore: PollSemaphore::new(Arc::clone(&self.semaphore)), + finalized_writes: Arc::clone(&self.finalized_writes), + }) + } + + fn open_reader(&self, path: &Path) -> io::Result { + let written_files = self.finalized_writes.lock().unwrap(); + for contents in written_files.iter() { + if contents.path == path { + return Ok(io::Cursor::new(contents.data.clone())); + } + } + Err(io::Error::new( + io::ErrorKind::Other, + format!("no written file for {}", path.display()), + )) + } + } + + struct InMemoryDiskWriter { + opened_path: Utf8PathBuf, + data: BlockSizeBufWriter>, + semaphore: PollSemaphore, + finalized_writes: Arc>>, + } + + #[async_trait] + impl DiskWriter for InMemoryDiskWriter { + fn block_size(&self) -> usize { + self.data.block_size() + } + + async fn finalize(mut self) -> io::Result<()> { + self.data.flush().await?; + + let mut finalized = self.finalized_writes.lock().unwrap(); + finalized.push(InMemoryDiskContents { + path: self.opened_path, + data: self.data.into_inner(), + }); + + Ok(()) + } + } + + impl AsyncWrite for InMemoryDiskWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let permit = match ready!(self.semaphore.poll_acquire(cx)) { + Some(permit) => permit, + None => panic!("test semaphore closed"), + }; + let result = Pin::new(&mut self.data).poll_write(cx, buf); + permit.forget(); + result + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.data).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.data).poll_shutdown(cx) + } + } + + fn expect_in_progress( + status: BootDiskOsWriteStatus, + ) -> BootDiskOsWriteProgress { + let BootDiskOsWriteStatus::InProgress { progress, .. } = status else { + panic!("expected Status::InProgress; got {status:?}"); + }; + progress + } + + #[tokio::test] + async fn boot_disk_os_writer_delivers_upload_progress_and_rejects_bad_hashes( + ) { + let logctx = + test_setup_log("boot_disk_os_writer_delivers_upload_progress_and_rejects_bad_hashes"); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let boot_disk = M2Slot::A; + + // We'll give the writer an intentionally-wrong sha3 digest and confirm + // it rejects the upload based on this. + let claimed_sha3_digest = [0; 32]; + + // Construct an in-memory stream around an mpsc channel as our client + // upload. + let (upload_tx, upload_rx) = mpsc::unbounded_channel(); + + // Spawn the `start_update` onto a background task; this won't end until + // we close (or send an error on) `upload_tx`. + let start_update_task = { + let writer = Arc::clone(&writer); + tokio::spawn(async move { + writer + .start_update( + boot_disk, + "/does-not-matter".into(), + Uuid::new_v4(), + claimed_sha3_digest, + UnboundedReceiverStream::new(upload_rx), + ) + .await + }) + }; + + // As we stream data in, we'll compute the actual hash to check against + // the error we expect to see. + let mut actual_data_hasher = Sha3_256::new(); + + // Run the rest of the test under a timeout to catch any incorrect + // assumptions that result in a hang. + tokio::time::timeout(TEST_TIMEOUT, async move { + // We're racing `writer`'s spawning of the actual update task; spin + // until we transition from "no update" to "receiving uploaded + // image". + loop { + match writer.status(boot_disk) { + BootDiskOsWriteStatus::NoUpdateStarted => { + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::InProgress { progress, .. } => { + assert_eq!( + progress, + BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received: 0 + } + ); + break; + } + status @ (BootDiskOsWriteStatus::Complete { .. } + | BootDiskOsWriteStatus::Failed { .. }) => { + panic!("unexpected status {status:?}") + } + } + } + + let mut prev_bytes_received = 0; + + // Send a few chunks of data. After each, we're racing with `writer` + // which has to copy that data to a temp file before the status will + // change, so loop until we see what we expect. Our TEST_TIMEOUT + // ensures we don't stay here forever if something goes wrong. + for i in 1..=10 { + let data_len = i * 100; + let chunk = vec![0; data_len]; + actual_data_hasher.update(&chunk); + upload_tx.send(Ok(Bytes::from(chunk))).unwrap(); + + loop { + let progress = expect_in_progress(writer.status(boot_disk)); + + // If we lost the race, the status is still what it was + // previously; sleep briefly and check again. + if progress + == (BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received: prev_bytes_received, + }) + { + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + + // It's not the old status; it should be exactly the new + // status. If it is, update our count and break out of this + // inner loop. + assert_eq!( + progress, + BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received: prev_bytes_received + data_len + } + ); + prev_bytes_received += data_len; + println!("chunk {i}: got {progress:?}"); + break; + } + } + + // Close the channel; `writer` should recognize the upload is + // complete, then realize there's a hash mismatch and fail the + // request. + mem::drop(upload_tx); + + // We expect to see an upload hash mismatch error with these hex + // strings. + let expected_hash = hex::encode(claimed_sha3_digest); + let got_hash = hex::encode(actual_data_hasher.finalize()); + + let start_update_result = start_update_task.await.unwrap(); + let error = start_update_result.unwrap_err(); + match &*error { + BootDiskOsWriteError::UploadedImageHashMismatch { + expected, + got, + } => { + assert_eq!(*got, got_hash); + assert_eq!(*expected, expected_hash); + } + _ => panic!("unexpected error {error:?}"), + } + + // The same error should be present in the current update status. + let expected_error = + BootDiskOsWriteError::UploadedImageHashMismatch { + expected: expected_hash.clone(), + got: got_hash.clone(), + }; + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::Failed { message, .. } => { + assert_eq!( + message, + DisplayErrorChain::new(&expected_error).to_string() + ); + } + BootDiskOsWriteStatus::NoUpdateStarted + | BootDiskOsWriteStatus::InProgress { .. } + | BootDiskOsWriteStatus::Complete { .. } => { + panic!("unexpected status {status:?}") + } + } + }) + .await + .unwrap(); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn boot_disk_os_writer_writes_data_to_disk() { + let logctx = test_setup_log("boot_disk_os_writer_writes_data_to_disk"); + + // generate a small, random "OS image" consisting of 10 "blocks" + let num_data_blocks = 10; + let data_len = num_data_blocks * InMemoryDiskInterface::BLOCK_SIZE; + let mut data = vec![0; data_len]; + rand::thread_rng().fill_bytes(&mut data); + let data_hash = Sha3_256::digest(&data); + + // generate a disk writer with a 0-permit semaphore; we'll inject + // permits in the main loop below to force single-stepping through + // writing the data + let inject_disk_interface = + InMemoryDiskInterface::new(Semaphore::new(0)); + let shared_semaphore = Arc::clone(&inject_disk_interface.semaphore); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let boot_disk = M2Slot::A; + let disk_devfs_path = "/unit-test/disk"; + + writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + Uuid::new_v4(), + data_hash.into(), + stream::once(future::ready(Ok(Bytes::from(data.clone())))), + inject_disk_interface, + ) + .await + .unwrap(); + + // Run the rest of the test under a timeout to catch any incorrect + // assumptions that result in a hang. + tokio::time::timeout(TEST_TIMEOUT, async move { + // Wait until `writer` has copied our data into a temp file + loop { + let progress = expect_in_progress(writer.status(boot_disk)); + match progress { + BootDiskOsWriteProgress::ReceivingUploadedImage { + bytes_received, + } => { + if bytes_received == data.len() { + break; + } else { + println!( + "got status with {} bytes received", + bytes_received + ); + } + } + _ => panic!("unexpected progress {progress:?}"), + } + } + + for i in 0..num_data_blocks { + // Add one permit to our shared semaphore, allowing one block of + // data to be written to the "disk". + shared_semaphore.add_permits(1); + + // Did we just release the write of the final block? If so, + // break; we'll wait for completion below. + if i + 1 == num_data_blocks { + break; + } + + // Wait until we see the status we expect for a not-yet-last + // block (i.e., that the disk is still being written). + loop { + let progress = expect_in_progress(writer.status(boot_disk)); + match progress { + BootDiskOsWriteProgress::WritingImageToDisk { + bytes_written, + } if (i + 1) * InMemoryDiskInterface::BLOCK_SIZE + == bytes_written => + { + println!("saw expected progress for block {i}"); + break; + } + _ => { + // This is not an error: we could still be in + // `ReceivingUploadedImage` or the previous + // block's `WritingImageToDisk` + println!("saw irrelevant progress {progress:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + } + } + } + + // The last block is being or has been written, and after that the + // writer will reread it to validate the hash. We won't bother + // repeating the same machinery to check each step of that process; + // we'll just wait for the eventual successful completion. + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::Complete { .. } => break, + BootDiskOsWriteStatus::InProgress { .. } => { + println!("saw irrelevant progress {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::NoUpdateStarted + | BootDiskOsWriteStatus::Failed { .. } => { + panic!("unexpected status {status:?}") + } + } + } + }) + .await + .unwrap(); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn boot_disk_os_writer_fails_if_reading_from_disk_doesnt_match() { + let logctx = test_setup_log( + "boot_disk_os_writer_fails_if_reading_from_disk_doesnt_match", + ); + + // generate a small, random "OS image" consisting of 10 "blocks" + let num_data_blocks = 10; + let data_len = num_data_blocks * InMemoryDiskInterface::BLOCK_SIZE; + let mut data = vec![0; data_len]; + rand::thread_rng().fill_bytes(&mut data); + let original_data_hash = Sha3_256::digest(&data); + + // generate a disk writer with (effectively) unlimited semaphore + // permits, since we don't need to throttle the "disk writing" + let inject_disk_interface = + InMemoryDiskInterface::new(Semaphore::new(Semaphore::MAX_PERMITS)); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let boot_disk = M2Slot::A; + let disk_devfs_path = "/unit-test/disk"; + + // copy the data and corrupt it, then stage this in + // `inject_disk_interface` so that it returns this corrupted data when + // "reading" the disk + let mut bad_data = data.clone(); + bad_data[0] ^= 1; // bit flip + let bad_data_hash = Sha3_256::digest(&bad_data); + inject_disk_interface.finalized_writes.lock().unwrap().push( + InMemoryDiskContents { + path: disk_devfs_path.into(), + data: bad_data, + }, + ); + + writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + Uuid::new_v4(), + original_data_hash.into(), + stream::once(future::ready(Ok(Bytes::from(data.clone())))), + inject_disk_interface, + ) + .await + .unwrap(); + + // We expect the update to eventually fail; wait for it to do so. + let failure_message = tokio::time::timeout(TEST_TIMEOUT, async move { + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::Failed { message, .. } => { + return message; + } + BootDiskOsWriteStatus::InProgress { .. } => { + println!("saw irrelevant status {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::Complete { .. } + | BootDiskOsWriteStatus::NoUpdateStarted => { + panic!("unexpected status {status:?}"); + } + } + } + }) + .await + .unwrap(); + + // Confirm that the update fails for the reason we expect: when + // re-reading what had been written to disk, it got our corrupt data + // (which hashes to `bad_data_hash`) instead of the expected + // `original_data_hash`. + let expected_error = BootDiskOsWriteError::WrittenImageHashMismatch { + path: disk_devfs_path.into(), + expected: hex::encode(&original_data_hash), + got: hex::encode(&bad_data_hash), + }; + + assert_eq!( + failure_message, + DisplayErrorChain::new(&expected_error).to_string() + ); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn boot_disk_os_writer_can_update_both_slots_simultaneously() { + let logctx = test_setup_log( + "boot_disk_os_writer_can_update_both_slots_simultaneously", + ); + + // generate two small, random "OS image"s consisting of 10 "blocks" each + let num_data_blocks = 10; + let data_len = num_data_blocks * InMemoryDiskInterface::BLOCK_SIZE; + let mut data_a = vec![0; data_len]; + let mut data_b = vec![0; data_len]; + rand::thread_rng().fill_bytes(&mut data_a); + rand::thread_rng().fill_bytes(&mut data_b); + let data_hash_a = Sha3_256::digest(&data_a); + let data_hash_b = Sha3_256::digest(&data_b); + + // generate a disk writer with no semaphore permits so the updates block + // until we get a chance to start both of them + let inject_disk_interface = + InMemoryDiskInterface::new(Semaphore::new(0)); + let shared_semaphore = Arc::clone(&inject_disk_interface.semaphore); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let disk_devfs_path_a = "/unit-test/disk/a"; + let disk_devfs_path_b = "/unit-test/disk/b"; + + let update_id_a = Uuid::new_v4(); + let update_id_b = Uuid::new_v4(); + + writer + .start_update_impl( + M2Slot::A, + disk_devfs_path_a.into(), + update_id_a, + data_hash_a.into(), + stream::once(future::ready(Ok(Bytes::from(data_a.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap(); + + writer + .start_update_impl( + M2Slot::B, + disk_devfs_path_b.into(), + update_id_b, + data_hash_b.into(), + stream::once(future::ready(Ok(Bytes::from(data_b.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap(); + + // Both updates have successfully started; unblock the "disks". + shared_semaphore.add_permits(Semaphore::MAX_PERMITS); + + // Wait for both updates to complete successfully. + for boot_disk in [M2Slot::A, M2Slot::B] { + tokio::time::timeout(TEST_TIMEOUT, async { + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::InProgress { .. } => { + println!("saw irrelevant status {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::Complete { update_id } => { + match boot_disk { + M2Slot::A => assert_eq!(update_id, update_id_a), + M2Slot::B => assert_eq!(update_id, update_id_b), + } + break; + } + BootDiskOsWriteStatus::Failed { .. } + | BootDiskOsWriteStatus::NoUpdateStarted => { + panic!("unexpected status {status:?}"); + } + } + } + }) + .await + .unwrap(); + } + + // Ensure each "disk" saw the expected contents. + let expected_disks = [ + InMemoryDiskContents { + path: disk_devfs_path_a.into(), + data: data_a, + }, + InMemoryDiskContents { + path: disk_devfs_path_b.into(), + data: data_b, + }, + ]; + let written_disks = + inject_disk_interface.finalized_writes.lock().unwrap(); + assert_eq!(written_disks.len(), expected_disks.len()); + for expected in expected_disks { + assert!( + written_disks.contains(&expected), + "written disks missing expected contents for {}", + expected.path, + ); + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn boot_disk_os_writer_rejects_new_updates_while_old_running() { + let logctx = test_setup_log( + "boot_disk_os_writer_rejects_new_updates_while_old_running", + ); + + // generate two small, random "OS image"s consisting of 10 "blocks" each + let num_data_blocks = 10; + let data_len = num_data_blocks * InMemoryDiskInterface::BLOCK_SIZE; + let mut data_a = vec![0; data_len]; + let mut data_b = vec![0; data_len]; + rand::thread_rng().fill_bytes(&mut data_a); + rand::thread_rng().fill_bytes(&mut data_b); + let data_hash_a = Sha3_256::digest(&data_a); + let data_hash_b = Sha3_256::digest(&data_b); + + // generate a disk writer with no semaphore permits so the updates block + // until we get a chance to (try to) start both of them + let inject_disk_interface = + InMemoryDiskInterface::new(Semaphore::new(0)); + let shared_semaphore = Arc::clone(&inject_disk_interface.semaphore); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let disk_devfs_path = "/unit-test/disk"; + let boot_disk = M2Slot::A; + + let update_id_a = Uuid::new_v4(); + let update_id_b = Uuid::new_v4(); + + writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + update_id_a, + data_hash_a.into(), + stream::once(future::ready(Ok(Bytes::from(data_a.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap(); + + let error = writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + update_id_b, + data_hash_b.into(), + stream::once(future::ready(Ok(Bytes::from(data_b.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap_err(); + match &*error { + BootDiskOsWriteError::UpdateRunning(running_id) => { + assert_eq!(*running_id, update_id_a); + } + _ => panic!("unexpected error {error}"), + } + + // Both update attempts started; unblock the "disk". + shared_semaphore.add_permits(Semaphore::MAX_PERMITS); + + // Wait for the first update to complete successfully. + tokio::time::timeout(TEST_TIMEOUT, async { + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::InProgress { .. } => { + println!("saw irrelevant status {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::Complete { update_id } => { + assert_eq!(update_id, update_id_a); + break; + } + BootDiskOsWriteStatus::Failed { .. } + | BootDiskOsWriteStatus::NoUpdateStarted => { + panic!("unexpected status {status:?}"); + } + } + } + }) + .await + .unwrap(); + + // Ensure we wrote the contents of the first update. + let expected_disks = [InMemoryDiskContents { + path: disk_devfs_path.into(), + data: data_a, + }]; + let written_disks = + inject_disk_interface.finalized_writes.lock().unwrap(); + assert_eq!(*written_disks, expected_disks); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn boot_disk_os_writer_rejects_new_updates_while_old_completed() { + let logctx = test_setup_log( + "boot_disk_os_writer_rejects_new_updates_while_old_completed", + ); + + // generate two small, random "OS image"s consisting of 10 "blocks" each + let num_data_blocks = 10; + let data_len = num_data_blocks * InMemoryDiskInterface::BLOCK_SIZE; + let mut data_a = vec![0; data_len]; + let mut data_b = vec![0; data_len]; + rand::thread_rng().fill_bytes(&mut data_a); + rand::thread_rng().fill_bytes(&mut data_b); + let data_hash_a = Sha3_256::digest(&data_a); + let data_hash_b = Sha3_256::digest(&data_b); + + // generate a disk writer with effectively infinite semaphore permits + let inject_disk_interface = + InMemoryDiskInterface::new(Semaphore::new(Semaphore::MAX_PERMITS)); + + let writer = Arc::new(BootDiskOsWriter::new(&logctx.log)); + let disk_devfs_path = "/unit-test/disk"; + let boot_disk = M2Slot::A; + + let update_id_a = Uuid::new_v4(); + let update_id_b = Uuid::new_v4(); + + writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + update_id_a, + data_hash_a.into(), + stream::once(future::ready(Ok(Bytes::from(data_a.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap(); + + // Wait for the first update to complete successfully. + tokio::time::timeout(TEST_TIMEOUT, async { + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::InProgress { update_id, .. } => { + assert_eq!(update_id, update_id_a); + println!("saw irrelevant status {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::Complete { update_id } => { + assert_eq!(update_id, update_id_a); + break; + } + BootDiskOsWriteStatus::Failed { .. } + | BootDiskOsWriteStatus::NoUpdateStarted => { + panic!("unexpected status {status:?}"); + } + } + } + }) + .await + .unwrap(); + + // Ensure we wrote the contents of the first update. + let expected_disks = [InMemoryDiskContents { + path: disk_devfs_path.into(), + data: data_a, + }]; + { + let mut written_disks = + inject_disk_interface.finalized_writes.lock().unwrap(); + assert_eq!(*written_disks, expected_disks); + written_disks.clear(); + } + + // Check that we get the expected error when attempting to start another + // update to this same disk. + let expected_error = + BootDiskOsWriteError::CannotStartWithoutClearingPreviousStatus( + update_id_a, + ); + let error = writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + update_id_b, + data_hash_b.into(), + stream::once(future::ready(Ok(Bytes::from(data_b.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap_err(); + assert_eq!(error.to_string(), expected_error.to_string()); + + // We should not be able to clear the status with an incorrect update + // ID. + let expected_error = + BootDiskOsWriteError::WrongUpdateIdClearingStatus(update_id_a); + let error = + writer.clear_terminal_status(boot_disk, update_id_b).unwrap_err(); + assert_eq!(error.to_string(), expected_error.to_string()); + + // We should be able to clear the status with the correct update ID, and + // then start the new one. + writer.clear_terminal_status(boot_disk, update_id_a).unwrap(); + writer + .start_update_impl( + boot_disk, + disk_devfs_path.into(), + update_id_b, + data_hash_b.into(), + stream::once(future::ready(Ok(Bytes::from(data_b.clone())))), + inject_disk_interface.clone(), + ) + .await + .unwrap(); + + // Wait for the second update to complete successfully. + tokio::time::timeout(TEST_TIMEOUT, async { + loop { + let status = writer.status(boot_disk); + match status { + BootDiskOsWriteStatus::InProgress { update_id, .. } => { + assert_eq!(update_id, update_id_b); + println!("saw irrelevant status {status:?}"); + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + } + BootDiskOsWriteStatus::Complete { update_id } => { + assert_eq!(update_id, update_id_b); + break; + } + BootDiskOsWriteStatus::Failed { .. } + | BootDiskOsWriteStatus::NoUpdateStarted => { + panic!("unexpected status {status:?}"); + } + } + } + }) + .await + .unwrap(); + + // Ensure we wrote the contents of the second update. + let expected_disks = [InMemoryDiskContents { + path: disk_devfs_path.into(), + data: data_b, + }]; + { + let mut written_disks = + inject_disk_interface.finalized_writes.lock().unwrap(); + assert_eq!(*written_disks, expected_disks); + written_disks.clear(); + } + + logctx.cleanup_successful(); + } +} diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index a596cf83db..058f343e2a 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -6,6 +6,7 @@ use crate::updates::ConfigUpdates; use camino::{Utf8Path, Utf8PathBuf}; +use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use illumos_utils::dladm::Dladm; use illumos_utils::dladm::FindPhysicalLinkError; @@ -44,6 +45,11 @@ pub struct SoftPortConfig { #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct Config { + /// Configuration for the sled agent dropshot server + /// + /// If the `bind_address` is set, it will be ignored. The remaining fields + /// will be respected. + pub dropshot: ConfigDropshot, /// Configuration for the sled agent debug log pub log: ConfigLogging, /// The sled's mode of operation (auto detect or force gimlet/scrimlet). diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2dcb35b77e..8c8a5f2a03 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -18,14 +18,17 @@ use crate::sled_agent::Error as SledAgentError; use crate::zone_bundle; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; +use display_error_chain::DisplayErrorChain; use dropshot::{ endpoint, ApiDescription, FreeformBody, HttpError, HttpResponseCreated, HttpResponseDeleted, HttpResponseHeaders, HttpResponseOk, - HttpResponseUpdatedNoContent, Path, Query, RequestContext, TypedBody, + HttpResponseUpdatedNoContent, Path, Query, RequestContext, StreamingBody, + TypedBody, }; use illumos_utils::opte::params::{ DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, }; +use installinator_common::M2Slot; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ DiskRuntimeState, SledInstanceState, UpdateArtifactId, @@ -36,6 +39,7 @@ use oximeter_producer::collect; use oximeter_producer::ProducerIdPathParams; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_hardware::DiskVariant; use std::collections::BTreeMap; use uuid::Uuid; @@ -75,6 +79,9 @@ pub fn api() -> SledApiDescription { api.register(write_network_bootstore_config)?; api.register(add_sled_to_initialized_rack)?; api.register(metrics_collect)?; + api.register(host_os_write_start)?; + api.register(host_os_write_status_get)?; + api.register(host_os_write_status_delete)?; Ok(()) } @@ -755,3 +762,166 @@ async fn metrics_collect( let producer_id = path_params.into_inner().producer_id; collect(&sa.metrics_registry(), producer_id).await } + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskPathParams { + pub boot_disk: M2Slot, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskUpdatePathParams { + pub boot_disk: M2Slot, + pub update_id: Uuid, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskWriteStartQueryParams { + pub update_id: Uuid, + // TODO do we already have sha2-256 hashes of the OS images, and if so + // should we use that instead? Another option is to use the external API + // `Digest` type, although it predates `serde_human_bytes` so just stores + // the hash as a `String`. + #[serde(with = "serde_human_bytes::hex_array")] + #[schemars(schema_with = "omicron_common::hex_schema::<32>")] + pub sha3_256_digest: [u8; 32], +} + +/// Write a new host OS image to the specified boot disk +#[endpoint { + method = POST, + path = "/boot-disk/{boot_disk}/os/write", +}] +async fn host_os_write_start( + request_context: RequestContext, + path_params: Path, + query_params: Query, + body: StreamingBody, +) -> Result { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + + // Find our corresponding disk. + let maybe_disk_path = + sa.storage().get_latest_resources().await.disks().values().find_map( + |(disk, _pool)| { + // Synthetic disks panic if asked for their `slot()`, so filter + // them out first; additionally, filter out any non-M2 disks. + if disk.is_synthetic() || disk.variant() != DiskVariant::M2 { + return None; + } + + // Convert this M2 disk's slot to an M2Slot, and skip any that + // don't match the requested boot_disk. + let Ok(slot) = M2Slot::try_from(disk.slot()) else { + return None; + }; + if slot != boot_disk { + return None; + } + + let raw_devs_path = true; + Some(disk.boot_image_devfs_path(raw_devs_path)) + }, + ); + + let disk_path = match maybe_disk_path { + Some(Ok(path)) => path, + Some(Err(err)) => { + let message = format!( + "failed to find devfs path for {boot_disk:?}: {}", + DisplayErrorChain::new(&err) + ); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + None => { + let message = format!("no disk found for slot {boot_disk:?}",); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + }; + + let BootDiskWriteStartQueryParams { update_id, sha3_256_digest } = + query_params.into_inner(); + sa.boot_disk_os_writer() + .start_update( + boot_disk, + disk_path, + update_id, + sha3_256_digest, + body.into_stream(), + ) + .await + .map_err(|err| HttpError::from(&*err))?; + Ok(HttpResponseUpdatedNoContent()) +} + +/// Current progress of an OS image being written to disk. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema, Serialize, +)] +#[serde(tag = "state", rename_all = "snake_case")] +pub enum BootDiskOsWriteProgress { + /// The image is still being uploaded. + ReceivingUploadedImage { bytes_received: usize }, + /// The image is being written to disk. + WritingImageToDisk { bytes_written: usize }, + /// The image is being read back from disk for validation. + ValidatingWrittenImage { bytes_read: usize }, +} + +/// Status of an update to a boot disk OS. +#[derive(Debug, Clone, Deserialize, JsonSchema, Serialize)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum BootDiskOsWriteStatus { + /// No update has been started for this disk, or any previously-started + /// update has completed and had its status cleared. + NoUpdateStarted, + /// An update is currently running. + InProgress { update_id: Uuid, progress: BootDiskOsWriteProgress }, + /// The most recent update completed successfully. + Complete { update_id: Uuid }, + /// The most recent update failed. + Failed { update_id: Uuid, message: String }, +} + +/// Get the status of writing a new host OS +#[endpoint { + method = GET, + path = "/boot-disk/{boot_disk}/os/write/status", +}] +async fn host_os_write_status_get( + request_context: RequestContext, + path_params: Path, +) -> Result, HttpError> { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + let status = sa.boot_disk_os_writer().status(boot_disk); + Ok(HttpResponseOk(status)) +} + +/// Clear the status of a completed write of a new host OS +#[endpoint { + method = DELETE, + path = "/boot-disk/{boot_disk}/os/write/status/{update_id}", +}] +async fn host_os_write_status_delete( + request_context: RequestContext, + path_params: Path, +) -> Result { + let sa = request_context.context(); + let BootDiskUpdatePathParams { boot_disk, update_id } = + path_params.into_inner(); + sa.boot_disk_os_writer() + .clear_terminal_status(boot_disk, update_id) + .map_err(|err| HttpError::from(&err))?; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index d77ec7a3c0..527b483ee8 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -18,6 +18,7 @@ pub mod common; // Modules for the non-simulated sled agent. mod backing_fs; +mod boot_disk_os_writer; pub mod bootstrap; pub mod config; pub(crate) mod dump_setup; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 903c8dabaa..b93ad0721c 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -70,9 +70,10 @@ impl Server { .await .map_err(|e| e.to_string())?; - let mut dropshot_config = dropshot::ConfigDropshot::default(); - dropshot_config.request_body_max_bytes = 1024 * 1024; - dropshot_config.bind_address = SocketAddr::V6(sled_address); + let dropshot_config = dropshot::ConfigDropshot { + bind_address: SocketAddr::V6(sled_address), + ..config.dropshot + }; let dropshot_log = log.new(o!("component" => "dropshot (SledAgent)")); let http_server = dropshot::HttpServerStarter::new( &dropshot_config, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 57aea61ae9..5f278b7f38 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -4,6 +4,7 @@ //! Sled agent implementation +use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::{ EarlyNetworkConfig, EarlyNetworkSetupError, @@ -264,6 +265,9 @@ struct SledAgentInner { // Object handling production of metrics for oximeter. metrics_manager: MetricsManager, + + // Handle to the traffic manager for writing OS updates to our boot disks. + boot_disk_os_writer: BootDiskOsWriter, } impl SledAgentInner { @@ -545,6 +549,7 @@ impl SledAgent { zone_bundler: long_running_task_handles.zone_bundler.clone(), bootstore: long_running_task_handles.bootstore.clone(), metrics_manager, + boot_disk_os_writer: BootDiskOsWriter::new(&parent_log), }), log: log.clone(), }; @@ -1043,6 +1048,14 @@ impl SledAgent { pub fn metrics_registry(&self) -> &ProducerRegistry { self.inner.metrics_manager.registry() } + + pub(crate) fn storage(&self) -> &StorageHandle { + &self.inner.storage + } + + pub(crate) fn boot_disk_os_writer(&self) -> &BootDiskOsWriter { + &self.inner.boot_disk_os_writer + } } async fn register_metric_producer_with_nexus( diff --git a/smf/sled-agent/gimlet-standalone/config.toml b/smf/sled-agent/gimlet-standalone/config.toml index e714504311..4d06895453 100644 --- a/smf/sled-agent/gimlet-standalone/config.toml +++ b/smf/sled-agent/gimlet-standalone/config.toml @@ -41,6 +41,11 @@ swap_device_size_gb = 256 data_links = ["net0", "net1"] +[dropshot] +# Host OS images are just over 800 MiB currently; set this to 2 GiB to give some +# breathing room. +request_body_max_bytes = 2_147_483_648 + [log] level = "info" mode = "file" diff --git a/smf/sled-agent/gimlet/config.toml b/smf/sled-agent/gimlet/config.toml index 442e76b393..666d55f359 100644 --- a/smf/sled-agent/gimlet/config.toml +++ b/smf/sled-agent/gimlet/config.toml @@ -37,6 +37,11 @@ swap_device_size_gb = 256 data_links = ["cxgbe0", "cxgbe1"] +[dropshot] +# Host OS images are just over 800 MiB currently; set this to 2 GiB to give some +# breathing room. +request_body_max_bytes = 2_147_483_648 + [log] level = "info" mode = "file" diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 176f4002a5..432652c50b 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -76,6 +76,11 @@ switch_zone_maghemite_links = ["tfportrear0_0"] data_links = ["net0", "net1"] +[dropshot] +# Host OS images are just over 800 MiB currently; set this to 2 GiB to give some +# breathing room. +request_body_max_bytes = 2_147_483_648 + [log] level = "info" mode = "file" From 5e9251783215ed7e4b86e1359e287e83889c59cd Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Fri, 8 Dec 2023 11:57:49 -0800 Subject: [PATCH 075/186] move the cockroachdb http console to localhost (#4655) I can't find any use of the HTTP console or any of its endpoints for anything, so let's move it off the underlay network. I have not tested this on hardware myself, but the logs for the deploy job here indicate that the `webui` has in fact moved to 127.0.0.1. I couldn't find a way to completely disable the HTTP console, but I think even if I did I would prefer this, as it still lets us access it for in-situ debugging (although I'm not well-versed enough with zones to understand how you would write an SSH forward to get to it with this change). --- smf/cockroachdb/method_script.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index ee42ab1891..e5ab4e8eaa 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -44,6 +44,7 @@ fi args=( '--insecure' '--listen-addr' "[$LISTEN_ADDR]:$LISTEN_PORT" + '--http-addr' '127.0.0.1:8080' '--store' "$DATASTORE" '--join' "$JOIN_ADDRS" ) From 2d93fede364f4dcaca7a62f62f96f2b9bc80e4e5 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 8 Dec 2023 14:21:53 -0800 Subject: [PATCH 076/186] [sled-agent/sled-hardware] remove serial_test dependency (#4656) serial_test is a proc macro which ensures that tests run serially rather than in parallel. However, as documented at https://nexte.st/book/test-groups.html, serial_test doesn't actually work with nextest. So go ahead and remove it as a dependency. The next question is: do we need to replace it with nextest's test groups? It turns out that the answer is "no". The only uses of serial_test in our codebase were to enable testing against a mocked free function (if multiple tests would call that function concurrently, the mocking infrastructure would get confused). However, that is only an issue if multiple tests are run within the same process. Nextest's execution model is process-per-test, which means that this isn't an issue at all. (This is also hinted at by the fact that `serial_test` has effectively been inoperative for months, yet we've had no issues with these tests.) --- Cargo.lock | 40 ------------------------- Cargo.toml | 1 - sled-agent/Cargo.toml | 1 - sled-agent/src/services.rs | 8 ----- sled-hardware/Cargo.toml | 1 - sled-hardware/src/illumos/partitions.rs | 1 - 6 files changed, 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71cca52057..67e1d3784c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1470,19 +1470,6 @@ dependencies = [ "syn 2.0.32", ] -[[package]] -name = "dashmap" -version = "5.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28" -dependencies = [ - "cfg-if", - "hashbrown 0.14.2", - "lock_api", - "once_cell", - "parking_lot_core 0.9.8", -] - [[package]] name = "data-encoding" version = "2.4.0" @@ -4916,7 +4903,6 @@ dependencies = [ "serde", "serde_human_bytes", "serde_json", - "serial_test", "sha3", "sled-agent-client", "sled-hardware", @@ -7438,31 +7424,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "serial_test" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c789ec87f4687d022a2405cf46e0cd6284889f1839de292cadeb6c6019506f2" -dependencies = [ - "dashmap", - "futures", - "lazy_static", - "log", - "parking_lot 0.12.1", - "serial_test_derive", -] - -[[package]] -name = "serial_test_derive" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b64f9e531ce97c88b4778aad0ceee079216071cffec6ac9b904277f8f92e7fe3" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "sha1" version = "0.10.6" @@ -7654,7 +7615,6 @@ dependencies = [ "rand 0.8.5", "schemars", "serde", - "serial_test", "slog", "thiserror", "tofino", diff --git a/Cargo.toml b/Cargo.toml index 2bdd8522eb..f8d2a07977 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -322,7 +322,6 @@ serde_path_to_error = "0.1.14" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" serde_with = "3.4.0" -serial_test = "0.10" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 7607d57b95..3f7fd1c7f2 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -92,7 +92,6 @@ openapi-lint.workspace = true openapiv3.workspace = true pretty_assertions.workspace = true rcgen.workspace = true -serial_test.workspace = true subprocess.workspace = true slog-async.workspace = true slog-term.workspace = true diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 651d2638e0..837c2a05df 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -3884,7 +3884,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); @@ -3916,7 +3915,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_ensure_service_which_already_exists() { let logctx = omicron_test_utils::dev::test_setup_log( "test_ensure_service_which_already_exists", @@ -3944,7 +3942,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_services_are_recreated_on_reboot() { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", @@ -3981,7 +3978,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_services_do_not_persist_without_config() { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", @@ -4023,7 +4019,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_bad_generations() { // Start like the normal tests. let logctx = @@ -4128,7 +4123,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_old_ledger_migration() { let logctx = omicron_test_utils::dev::test_setup_log( "test_old_ledger_migration", @@ -4193,7 +4187,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_old_ledger_migration_continue() { // This test is just like "test_old_ledger_migration", except that we // deploy a new zone after migration and before shutting down the @@ -4271,7 +4264,6 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_old_ledger_migration_bad() { let logctx = omicron_test_utils::dev::test_setup_log( "test_old_ledger_migration_bad", diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 36ba633067..66ecbf9d64 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -31,4 +31,3 @@ libefi-illumos = { git = "https://github.com/oxidecomputer/libefi-illumos", bran [dev-dependencies] illumos-utils = { workspace = true, features = ["testing"] } omicron-test-utils.workspace = true -serial_test.workspace = true diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 4b7e69057d..de62e25cfe 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -207,7 +207,6 @@ mod test { } #[test] - #[serial_test::serial] fn ensure_partition_layout_u2_format_with_dev_path() { let logctx = test_setup_log("ensure_partition_layout_u2_format_with_dev_path"); From 2a3db4154a73761b559f6e2d9bc2640de15d1c3d Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 8 Dec 2023 14:39:57 -0800 Subject: [PATCH 077/186] [nexus] improve external messages and make more available to clients (#4573) While developing #4520, I observed that we were producing a number of error messages that were: * 503 Service Unavailable, * With only an internal message attached * But where the message is both safe and useful to display to clients. This is my attempt to make the situation slightly better. To achieve this, I made a few changes: 1. Make all the client errors carry a new `MessagePair` struct, which consists of an external message. (Along the way, correct the definition of e.g. the `Conflict` variant: it actually is an external message, not an internal one.) 2. Define a new `InsufficientCapacity` variant that consists of both an external and an internal message. This variant resolves to a 507 Insufficient Storage error, and has a more helpful message than just "Service Unavailable". 3. Turn some current 503 errors into client errors so that the message is available externally. Looking for feedback on this approach! --- certificates/src/lib.rs | 16 +- common/src/api/external/error.rs | 222 ++++++++++++++---- common/src/api/external/mod.rs | 13 +- common/src/vlan.rs | 16 +- docs/http-status-codes.adoc | 3 +- nexus/db-model/src/instance_state.rs | 7 + nexus/db-model/src/semver_version.rs | 10 +- nexus/db-queries/src/db/datastore/disk.rs | 14 +- .../src/db/datastore/external_ip.rs | 16 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 15 +- .../src/db/datastore/ipv4_nat_entry.rs | 12 +- nexus/db-queries/src/db/datastore/mod.rs | 4 +- nexus/db-queries/src/db/datastore/project.rs | 14 +- nexus/db-queries/src/db/datastore/saga.rs | 6 +- nexus/db-queries/src/db/datastore/silo.rs | 14 +- nexus/db-queries/src/db/datastore/sled.rs | 6 +- nexus/db-queries/src/db/datastore/vpc.rs | 39 ++- .../db-queries/src/db/queries/external_ip.rs | 28 ++- .../src/db/queries/region_allocation.rs | 10 +- nexus/src/app/address_lot.rs | 19 +- nexus/src/app/device_auth.rs | 4 +- nexus/src/app/disk.rs | 32 +-- nexus/src/app/external_endpoints.rs | 2 +- nexus/src/app/image.rs | 20 +- nexus/src/app/instance.rs | 75 +++--- nexus/src/app/rack.rs | 7 +- nexus/src/app/session.rs | 2 +- nexus/src/app/silo.rs | 38 ++- nexus/src/app/switch_interface.rs | 6 +- nexus/src/app/update/mod.rs | 12 +- nexus/src/app/vpc_router.rs | 23 +- nexus/src/external_api/http_entrypoints.rs | 5 +- nexus/tests/integration_tests/disks.rs | 6 +- nexus/tests/integration_tests/instances.rs | 16 +- .../tests/integration_tests/router_routes.rs | 2 +- nexus/tests/integration_tests/snapshots.rs | 2 +- .../integration_tests/volume_management.rs | 2 +- sled-agent/src/common/disk.rs | 26 +- sled-agent/src/instance.rs | 12 +- sled-agent/src/sim/collection.rs | 7 +- sled-agent/src/sim/instance.rs | 12 +- 41 files changed, 440 insertions(+), 355 deletions(-) diff --git a/certificates/src/lib.rs b/certificates/src/lib.rs index 6bd7fa32de..442a9cfdd5 100644 --- a/certificates/src/lib.rs +++ b/certificates/src/lib.rs @@ -60,14 +60,14 @@ impl From for Error { | InvalidValidationHostname(_) | ErrorValidatingHostname(_) | NoDnsNameMatchingHostname { .. } - | UnsupportedPurpose => Error::InvalidValue { - label: String::from("certificate"), - message: DisplayErrorChain::new(&error).to_string(), - }, - BadPrivateKey(_) => Error::InvalidValue { - label: String::from("private-key"), - message: DisplayErrorChain::new(&error).to_string(), - }, + | UnsupportedPurpose => Error::invalid_value( + "certificate", + DisplayErrorChain::new(&error).to_string(), + ), + BadPrivateKey(_) => Error::invalid_value( + "private-key", + DisplayErrorChain::new(&error).to_string(), + ), Unexpected(_) => Error::InternalError { internal_message: DisplayErrorChain::new(&error).to_string(), }, diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index e508b7ecba..2661db7bb6 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -35,16 +35,16 @@ pub enum Error { ObjectAlreadyExists { type_name: ResourceType, object_name: String }, /// The request was well-formed, but the operation cannot be completed given /// the current state of the system. - #[error("Invalid Request: {message}")] - InvalidRequest { message: String }, + #[error("Invalid Request: {}", .message.display_internal())] + InvalidRequest { message: MessagePair }, /// Authentication credentials were required but either missing or invalid. /// The HTTP status code is called "Unauthorized", but it's more accurate to /// call it "Unauthenticated". #[error("Missing or invalid credentials")] Unauthenticated { internal_message: String }, /// The specified input field is not valid. - #[error("Invalid Value: {label}, {message}")] - InvalidValue { label: String, message: String }, + #[error("Invalid Value: {label}, {}", .message.display_internal())] + InvalidValue { label: String, message: MessagePair }, /// The request is not authorized to perform the requested operation. #[error("Forbidden")] Forbidden, @@ -55,15 +55,86 @@ pub enum Error { /// The system (or part of it) is unavailable. #[error("Service Unavailable: {internal_message}")] ServiceUnavailable { internal_message: String }, - /// Method Not Allowed - #[error("Method Not Allowed: {internal_message}")] - MethodNotAllowed { internal_message: String }, + + /// There is insufficient capacity to perform the requested operation. + /// + /// This variant is translated to 507 Insufficient Storage, and it carries + /// both an external and an internal message. The external message is + /// intended for operator consumption and is intended to not leak any + /// implementation details. + #[error("Insufficient Capacity: {}", .message.display_internal())] + InsufficientCapacity { message: MessagePair }, #[error("Type version mismatch! {internal_message}")] TypeVersionMismatch { internal_message: String }, - #[error("Conflict: {internal_message}")] - Conflict { internal_message: String }, + #[error("Conflict: {}", .message.display_internal())] + Conflict { message: MessagePair }, +} + +/// Represents an error message which has an external component, along with +/// some internal context possibly attached to it. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct MessagePair { + external_message: String, + internal_context: String, +} + +impl MessagePair { + pub fn new(external_message: String) -> Self { + Self { external_message, internal_context: String::new() } + } + + pub fn new_full( + external_message: String, + internal_context: String, + ) -> Self { + Self { external_message, internal_context } + } + + pub fn external_message(&self) -> &str { + &self.external_message + } + + pub fn internal_context(&self) -> &str { + &self.internal_context + } + + fn with_internal_context(self, context: C) -> Self + where + C: Display + Send + Sync + 'static, + { + let internal_context = if self.internal_context.is_empty() { + context.to_string() + } else { + format!("{}: {}", context, self.internal_context) + }; + Self { external_message: self.external_message, internal_context } + } + + pub fn into_internal_external(self) -> (String, String) { + let internal = self.display_internal().to_string(); + (internal, self.external_message) + } + + // Do not implement `fmt::Display` for this enum because we don't want users to + // accidentally display the internal message to the client. Instead, use a + // private formatter. + fn display_internal(&self) -> MessagePairDisplayInternal<'_> { + MessagePairDisplayInternal(self) + } +} + +struct MessagePairDisplayInternal<'a>(&'a MessagePair); + +impl<'a> Display for MessagePairDisplayInternal<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.external_message)?; + if !self.0.internal_context.is_empty() { + write!(f, " (with internal context: {})", self.0.internal_context)?; + } + Ok(()) + } } /// Indicates how an object was looked up (for an `ObjectNotFound` error) @@ -119,7 +190,7 @@ impl Error { | Error::InvalidRequest { .. } | Error::InvalidValue { .. } | Error::Forbidden - | Error::MethodNotAllowed { .. } + | Error::InsufficientCapacity { .. } | Error::InternalError { .. } | Error::TypeVersionMismatch { .. } | Error::Conflict { .. } => false, @@ -151,8 +222,20 @@ impl Error { /// /// This should be used for failures due possibly to invalid client input /// or malformed requests. - pub fn invalid_request(message: &str) -> Error { - Error::InvalidRequest { message: message.to_owned() } + pub fn invalid_request(message: impl Into) -> Error { + Error::InvalidRequest { message: MessagePair::new(message.into()) } + } + + /// Generates an [`Error::InvalidValue`] error with the specific label and + /// message. + pub fn invalid_value( + label: impl Into, + message: impl Into, + ) -> Error { + Error::InvalidValue { + label: label.into(), + message: MessagePair::new(message.into()), + } } /// Generates an [`Error::ServiceUnavailable`] error with the specific @@ -166,6 +249,27 @@ impl Error { Error::ServiceUnavailable { internal_message: message.to_owned() } } + /// Generates an [`Error::InsufficientCapacity`] error with external and + /// and internal messages. + /// + /// This should be used for failures where there is insufficient capacity, + /// and where the caller must either take action or wait until capacity is + /// freed. + /// + /// In the future, we may want to provide more help here: e.g. a link to a + /// status or support page. + pub fn insufficient_capacity( + external_message: impl Into, + internal_message: impl Into, + ) -> Error { + Error::InsufficientCapacity { + message: MessagePair::new_full( + external_message.into(), + internal_message.into(), + ), + } + } + /// Generates an [`Error::TypeVersionMismatch`] with a specific message. /// /// TypeVersionMismatch errors are a specific type of error arising from differences @@ -186,8 +290,8 @@ impl Error { /// retried. The internal message should provide more information about the /// source of the conflict and possible actions the caller can take to /// resolve it (if any). - pub fn conflict(message: &str) -> Error { - Error::Conflict { internal_message: message.to_owned() } + pub fn conflict(message: impl Into) -> Error { + Error::Conflict { message: MessagePair::new(message.into()) } } /// Given an [`Error`] with an internal message, return the same error with @@ -201,9 +305,14 @@ impl Error { match self { Error::ObjectNotFound { .. } | Error::ObjectAlreadyExists { .. } - | Error::InvalidRequest { .. } - | Error::InvalidValue { .. } | Error::Forbidden => self, + Error::InvalidRequest { message } => Error::InvalidRequest { + message: message.with_internal_context(context), + }, + Error::InvalidValue { label, message } => Error::InvalidValue { + label, + message: message.with_internal_context(context), + }, Error::Unauthenticated { internal_message } => { Error::Unauthenticated { internal_message: format!( @@ -223,12 +332,9 @@ impl Error { ), } } - Error::MethodNotAllowed { internal_message } => { - Error::MethodNotAllowed { - internal_message: format!( - "{}: {}", - context, internal_message - ), + Error::InsufficientCapacity { message } => { + Error::InsufficientCapacity { + message: message.with_internal_context(context), } } Error::TypeVersionMismatch { internal_message } => { @@ -239,8 +345,8 @@ impl Error { ), } } - Error::Conflict { internal_message } => Error::Conflict { - internal_message: format!("{}: {}", context, internal_message), + Error::Conflict { message } => Error::Conflict { + message: message.with_internal_context(context), }, } } @@ -292,28 +398,29 @@ impl From for HttpError { internal_message, }, - Error::InvalidRequest { message } => HttpError::for_bad_request( - Some(String::from("InvalidRequest")), - message, - ), - - Error::InvalidValue { label, message } => { - let message = - format!("unsupported value for \"{}\": {}", label, message); - HttpError::for_bad_request( - Some(String::from("InvalidValue")), - message, - ) + Error::InvalidRequest { message } => { + let (internal_message, external_message) = + message.into_internal_external(); + HttpError { + status_code: http::StatusCode::BAD_REQUEST, + error_code: Some(String::from("InvalidRequest")), + external_message, + internal_message, + } } - // TODO: RFC-7231 requires that 405s generate an Accept header to describe - // what methods are available in the response - Error::MethodNotAllowed { internal_message } => { - HttpError::for_client_error( - Some(String::from("MethodNotAllowed")), - http::StatusCode::METHOD_NOT_ALLOWED, + Error::InvalidValue { label, message } => { + let (internal_message, external_message) = + message.into_internal_external(); + HttpError { + status_code: http::StatusCode::BAD_REQUEST, + error_code: Some(String::from("InvalidValue")), + external_message: format!( + "unsupported value for \"{}\": {}", + label, external_message + ), internal_message, - ) + } } Error::Forbidden => HttpError::for_client_error( @@ -333,16 +440,35 @@ impl From for HttpError { ) } + Error::InsufficientCapacity { message } => { + let (internal_message, external_message) = + message.into_internal_external(); + // Need to construct an `HttpError` explicitly to present both + // an internal and an external message. + HttpError { + status_code: http::StatusCode::INSUFFICIENT_STORAGE, + error_code: Some(String::from("InsufficientCapacity")), + external_message: format!( + "Insufficient capacity: {}", + external_message + ), + internal_message, + } + } + Error::TypeVersionMismatch { internal_message } => { HttpError::for_internal_error(internal_message) } - Error::Conflict { internal_message } => { - HttpError::for_client_error( - Some(String::from("Conflict")), - http::StatusCode::CONFLICT, + Error::Conflict { message } => { + let (internal_message, external_message) = + message.into_internal_external(); + HttpError { + status_code: http::StatusCode::CONFLICT, + error_code: Some(String::from("Conflict")), + external_message, internal_message, - ) + } } } } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 50516a5da4..a6d729593b 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -316,10 +316,7 @@ impl Name { /// `Name::try_from(String)` that marshals any error into an appropriate /// `Error`. pub fn from_param(value: String, label: &str) -> Result { - value.parse().map_err(|e| Error::InvalidValue { - label: String::from(label), - message: e, - }) + value.parse().map_err(|e| Error::invalid_value(label, e)) } /// Return the `&str` representing the actual name. @@ -2828,10 +2825,10 @@ mod test { assert!(result.is_err()); assert_eq!( result, - Err(Error::InvalidValue { - label: "the_name".to_string(), - message: "name requires at least one character".to_string() - }) + Err(Error::invalid_value( + "the_name", + "name requires at least one character" + )) ); } diff --git a/common/src/vlan.rs b/common/src/vlan.rs index 45776e09ac..5e5765ffe2 100644 --- a/common/src/vlan.rs +++ b/common/src/vlan.rs @@ -20,10 +20,10 @@ impl VlanID { /// Creates a new VLAN ID, returning an error if it is out of range. pub fn new(id: u16) -> Result { if VLAN_MAX < id { - return Err(Error::InvalidValue { - label: id.to_string(), - message: "Invalid VLAN value".to_string(), - }); + return Err(Error::invalid_value( + id.to_string(), + "Invalid VLAN value", + )); } Ok(Self(id)) } @@ -38,9 +38,9 @@ impl fmt::Display for VlanID { impl FromStr for VlanID { type Err = Error; fn from_str(s: &str) -> Result { - Self::new(s.parse().map_err(|e| Error::InvalidValue { - label: s.to_string(), - message: format!("{}", e), - })?) + Self::new( + s.parse::() + .map_err(|e| Error::invalid_value(s, e.to_string()))?, + ) } } diff --git a/docs/http-status-codes.adoc b/docs/http-status-codes.adoc index e02f9ea8a5..4628edcab5 100644 --- a/docs/http-status-codes.adoc +++ b/docs/http-status-codes.adoc @@ -18,7 +18,8 @@ This doc is aimed at the public API. For consistency, we should use the same er ** "403 Forbidden" is used when the user provided valid credentials (they were authenticated), but they're not authorized to access the resource, _and_ we don't mind telling them that the resource exists (e.g., accessing "/sleds"). ** "404 Not Found" is used when the user provided valid credentials (they were authenticated), but they're not authorized to access the resource, and they're not even allowed to know whether it exists (e.g., accessing a particular Project). * "500 Internal Server Error" is used for any kind of _bug_ or unhandled server-side condition. -* "503 Service unavailable" is used when the service (or an internal service on which the service depends) is overloaded or actually unavailable. +* "503 Service Unavailable" is used when the service (or an internal service on which the service depends) is overloaded or actually unavailable. +* "507 Insufficient Storage" is used if there isn't sufficient capacity available for a particular operation (for example, if there isn't enough disk space available to allocate a new virtual disk). There's more discussion about the 400-level and 500-level codes below. diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index 6baec7afbd..6b4c71da79 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -6,6 +6,7 @@ use super::impl_enum_wrapper; use omicron_common::api::external; use serde::Deserialize; use serde::Serialize; +use std::fmt; use std::io::Write; impl_enum_wrapper!( @@ -40,6 +41,12 @@ impl InstanceState { } } +impl fmt::Display for InstanceState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + impl From for sled_agent_client::types::InstanceState { fn from(s: InstanceState) -> Self { use external::InstanceState::*; diff --git a/nexus/db-model/src/semver_version.rs b/nexus/db-model/src/semver_version.rs index 966b436149..8e168e11a2 100644 --- a/nexus/db-model/src/semver_version.rs +++ b/nexus/db-model/src/semver_version.rs @@ -68,12 +68,10 @@ fn to_sortable_string(v: semver::Version) -> Result { let max = u64::pow(10, u32::from(PADDED_WIDTH)) - 1; if v.major > max || v.minor > max || v.patch > max { - return Err(external::Error::InvalidValue { - label: "version".to_string(), - message: format!( - "Major, minor, and patch version must be less than {max}" - ), - }); + return Err(external::Error::invalid_value( + "version", + format!("Major, minor, and patch version must be less than {max}"), + )); } let mut result = format!( diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 26d439b350..94d950f86a 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -633,16 +633,12 @@ impl DataStore { // destroyed, don't throw an error. return Ok(disk); } else if !ok_to_delete_states.contains(disk_state.state()) { - return Err(Error::InvalidRequest { - message: format!( - "disk cannot be deleted in state \"{}\"", - disk.runtime_state.disk_state - ), - }); + return Err(Error::invalid_request(format!( + "disk cannot be deleted in state \"{}\"", + disk.runtime_state.disk_state + ))); } else if disk_state.is_attached() { - return Err(Error::InvalidRequest { - message: String::from("disk is attached"), - }); + return Err(Error::invalid_request("disk is attached")); } else { // NOTE: This is a "catch-all" error case, more specific // errors should be preferred as they're more actionable. diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index e821082501..ddf396f871 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -219,9 +219,12 @@ impl DataStore { "Requested external IP address not available", )) } else { - TransactionError::CustomError(Error::invalid_request( - "No external IP addresses available", - )) + TransactionError::CustomError( + Error::insufficient_capacity( + "No external IP addresses available", + "NextExternalIp::new returned NotFound", + ), + ) } } DatabaseError(UniqueViolation, ..) if name.is_some() => { @@ -450,10 +453,9 @@ impl DataStore { })?; if updated_rows == 0 { - return Err(Error::InvalidRequest { - message: "deletion failed due to concurrent modification" - .to_string(), - }); + return Err(Error::invalid_request( + "deletion failed due to concurrent modification", + )); } Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index fb300ef833..4497e3f2b4 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -194,11 +194,9 @@ impl DataStore { .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; if range.is_some() { - return Err(Error::InvalidRequest { - message: - "IP Pool cannot be deleted while it contains IP ranges" - .to_string(), - }); + return Err(Error::invalid_request( + "IP Pool cannot be deleted while it contains IP ranges", + )); } // Delete the pool, conditional on the rcgen not having changed. This @@ -224,10 +222,9 @@ impl DataStore { })?; if updated_rows == 0 { - return Err(Error::InvalidRequest { - message: "deletion failed due to concurrent modification" - .to_string(), - }); + return Err(Error::invalid_request( + "deletion failed due to concurrent modification", + )); } Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index 1caf5617bb..a44fed4cdf 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -124,9 +124,7 @@ impl DataStore { if let Some(nat_entry) = result.first() { Ok(nat_entry.clone()) } else { - Err(Error::InvalidRequest { - message: "no matching records".to_string(), - }) + Err(Error::invalid_request("no matching records")) } } @@ -185,9 +183,7 @@ impl DataStore { if let Some(nat_entry) = result.first() { Ok(nat_entry.clone()) } else { - Err(Error::InvalidRequest { - message: "no matching records".to_string(), - }) + Err(Error::invalid_request("no matching records")) } } @@ -241,9 +237,7 @@ impl DataStore { match latest { Some(value) => Ok(value), - None => Err(Error::InvalidRequest { - message: "sequence table is empty!".to_string(), - }), + None => Err(Error::invalid_request("sequence table is empty!")), } } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 2844285f40..761c3f995f 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -1046,7 +1046,7 @@ mod test { "Saw error: \'{err}\', but expected \'{expected}\'" ); - assert!(matches!(err, Error::ServiceUnavailable { .. })); + assert!(matches!(err, Error::InsufficientCapacity { .. })); } let _ = db.cleanup().await; @@ -1191,7 +1191,7 @@ mod test { "Saw error: \'{err}\', but expected \'{expected}\'" ); - assert!(matches!(err, Error::ServiceUnavailable { .. })); + assert!(matches!(err, Error::InsufficientCapacity { .. })); let _ = db.cleanup().await; logctx.cleanup_successful(); diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index ba0c64abfd..a9015ea943 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -78,9 +78,9 @@ macro_rules! generate_fn_to_ensure_none_in_project { "a" }; - return Err(Error::InvalidRequest { - message: format!("project to be deleted contains {article} {object}: {label}"), - }); + return Err(Error::invalid_request( + format!("project to be deleted contains {article} {object}: {label}") + )); } Ok(()) @@ -271,11 +271,9 @@ impl DataStore { })?; if updated_rows == 0 { - return Err(err.bail(Error::InvalidRequest { - message: - "deletion failed due to concurrent modification" - .to_string(), - })); + return Err(err.bail(Error::invalid_request( + "deletion failed due to concurrent modification", + ))); } self.virtual_provisioning_collection_delete_on_connection( diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 2ec0c40799..1cd41a9806 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -87,8 +87,8 @@ impl DataStore { match result.status { UpdateStatus::Updated => Ok(()), - UpdateStatus::NotUpdatedButExists => Err(Error::InvalidRequest { - message: format!( + UpdateStatus::NotUpdatedButExists => Err(Error::invalid_request( + format!( "failed to update saga {:?} with state {:?}: preconditions not met: \ expected current_sec = {:?}, adopt_generation = {:?}, \ but found current_sec = {:?}, adopt_generation = {:?}, state = {:?}", @@ -100,7 +100,7 @@ impl DataStore { result.found.adopt_generation, result.found.saga_state, ) - }), + )), } } diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index ab48ec458f..437c171fb0 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -351,9 +351,9 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; if project_found.is_some() { - return Err(Error::InvalidRequest { - message: "silo to be deleted contains a project".to_string(), - }); + return Err(Error::invalid_request( + "silo to be deleted contains a project", + )); } let now = Utc::now(); @@ -375,11 +375,9 @@ impl DataStore { })?; if updated_rows == 0 { - return Err(TxnError::CustomError(Error::InvalidRequest { - message: - "silo deletion failed due to concurrent modification" - .to_string(), - })); + return Err(TxnError::CustomError(Error::invalid_request( + "silo deletion failed due to concurrent modification", + ))); } self.virtual_provisioning_collection_delete_on_connection( diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 023384a9bf..7b94d64418 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -217,8 +217,10 @@ impl DataStore { if let Some(err) = err.take() { match err { SledReservationError::NotFound => { - return external::Error::unavail( + return external::Error::insufficient_capacity( "No sleds can fit the requested instance", + "No sled targets found that had enough \ + capacity to fit the requested instance.", ); } } @@ -399,7 +401,7 @@ mod test { ) .await .unwrap_err(); - assert!(matches!(error, external::Error::ServiceUnavailable { .. })); + assert!(matches!(error, external::Error::InsufficientCapacity { .. })); // Now add a provisionable sled and try again. let sled_update = test_new_sled_update(); diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 069ce63028..4f0245e283 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -339,7 +339,10 @@ impl DataStore { opctx.log, "failed to find a VNI after searching entire range"; ); - Err(Error::unavail("Failed to find a free VNI for this VPC")) + Err(Error::insufficient_capacity( + "No free virtual network was found", + "Failed to find a free VNI for this VPC", + )) } // Internal implementation for creating a VPC. @@ -469,11 +472,9 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? .is_some() { - return Err(Error::InvalidRequest { - message: String::from( - "VPC cannot be deleted while VPC Subnets exist", - ), - }); + return Err(Error::invalid_request( + "VPC cannot be deleted while VPC Subnets exist", + )); } // Delete the VPC, conditional on the subnet_gen not having changed. @@ -492,11 +493,9 @@ impl DataStore { ) })?; if updated_rows == 0 { - Err(Error::InvalidRequest { - message: String::from( - "deletion failed to to concurrent modification", - ), - }) + Err(Error::invalid_request( + "deletion failed due to concurrent modification", + )) } else { Ok(()) } @@ -794,12 +793,10 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? .is_some() { - return Err(Error::InvalidRequest { - message: String::from( - "VPC Subnet cannot be deleted while \ - network interfaces in the subnet exist", - ), - }); + return Err(Error::invalid_request( + "VPC Subnet cannot be deleted while network interfaces in the \ + subnet exist", + )); } // Delete the subnet, conditional on the rcgen not having changed. @@ -818,11 +815,9 @@ impl DataStore { ) })?; if updated_rows == 0 { - return Err(Error::InvalidRequest { - message: String::from( - "deletion failed to to concurrent modification", - ), - }); + return Err(Error::invalid_request( + "deletion failed due to concurrent modification", + )); } else { Ok(()) } diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 4e5f59e79c..2a76ea7408 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -997,9 +997,10 @@ mod tests { ); assert_eq!( err, - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } + Error::insufficient_capacity( + "No external IP addresses available", + "NextExternalIp::new returned NotFound", + ), ); context.success().await; } @@ -1053,9 +1054,10 @@ mod tests { ); assert_eq!( res.unwrap_err(), - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } + Error::insufficient_capacity( + "No external IP addresses available", + "NextExternalIp::new returned NotFound", + ), ); let res = context @@ -1075,9 +1077,10 @@ mod tests { ); assert_eq!( res.unwrap_err(), - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } + Error::insufficient_capacity( + "No external IP addresses available", + "NextExternalIp::new returned NotFound", + ), ); context.success().await; } @@ -1306,9 +1309,10 @@ mod tests { .expect_err("Should have failed to allocate after pool exhausted"); assert_eq!( err, - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } + Error::insufficient_capacity( + "No external IP addresses available", + "NextExternalIp::new returned NotFound", + ), ); // But we should be able to allocate another SNat IP diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 031be92c08..3c37bf6b2e 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -46,19 +46,23 @@ pub fn from_diesel(e: DieselError) -> external::Error { NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, ]; if let Some(sentinel) = matches_sentinel(&e, &sentinels) { + let external_message = "Not enough storage"; match sentinel { NOT_ENOUGH_DATASETS_SENTINEL => { - return external::Error::unavail( + return external::Error::insufficient_capacity( + external_message, "Not enough datasets to allocate disks", ); } NOT_ENOUGH_ZPOOL_SPACE_SENTINEL => { - return external::Error::unavail( + return external::Error::insufficient_capacity( + external_message, "Not enough zpool space to allocate disks. There may not be enough disks with space for the requested region. You may also see this if your rack is in a degraded state, or you're running the default multi-rack topology configuration in a 1-sled development environment.", ); } NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL => { - return external::Error::unavail( + return external::Error::insufficient_capacity( + external_message, "Not enough unique zpools selected while allocating disks", ); } diff --git a/nexus/src/app/address_lot.rs b/nexus/src/app/address_lot.rs index b87ae1b09f..847021bdd4 100644 --- a/nexus/src/app/address_lot.rs +++ b/nexus/src/app/address_lot.rs @@ -94,10 +94,9 @@ fn validate_blocks(lot: ¶ms::AddressLotCreate) -> Result<(), Error> { validate_v6_block(first, last)? } _ => { - return Err(Error::InvalidRequest { - message: "Block bounds must be in same address family" - .into(), - }) + return Err(Error::invalid_request( + "Block bounds must be in same address family", + )); } } } @@ -106,18 +105,18 @@ fn validate_blocks(lot: ¶ms::AddressLotCreate) -> Result<(), Error> { fn validate_v4_block(first: &Ipv4Addr, last: &Ipv4Addr) -> Result<(), Error> { if first > last { - return Err(Error::InvalidRequest { - message: "Invalid range, first must be <= last".into(), - }); + return Err(Error::invalid_request( + "Invalid range, first must be <= last", + )); } Ok(()) } fn validate_v6_block(first: &Ipv6Addr, last: &Ipv6Addr) -> Result<(), Error> { if first > last { - return Err(Error::InvalidRequest { - message: "Invalid range, first must be <= last".into(), - }); + return Err(Error::invalid_request( + "Invalid range, first must be <= last", + )); } Ok(()) } diff --git a/nexus/src/app/device_auth.rs b/nexus/src/app/device_auth.rs index c9571ee91f..c70b339a36 100644 --- a/nexus/src/app/device_auth.rs +++ b/nexus/src/app/device_auth.rs @@ -114,9 +114,7 @@ impl super::Nexus { token, ) .await?; - Err(Error::InvalidRequest { - message: "device authorization request expired".to_string(), - }) + Err(Error::invalid_request("device authorization request expired")) } else { self.db_datastore .device_access_token_create( diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 5cfecc9f08..5dd49a2efb 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -140,48 +140,48 @@ impl super::Nexus { // Reject disks where the block size doesn't evenly divide the // total size if (params.size.to_bytes() % block_size) != 0 { - return Err(Error::InvalidValue { - label: String::from("size and block_size"), - message: format!( + return Err(Error::invalid_value( + "size and block_size", + format!( "total size must be a multiple of block size {}", block_size, ), - }); + )); } // Reject disks where the size isn't at least // MIN_DISK_SIZE_BYTES if params.size.to_bytes() < MIN_DISK_SIZE_BYTES as u64 { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "total size must be at least {}", ByteCount::from(MIN_DISK_SIZE_BYTES) ), - }); + )); } // Reject disks where the MIN_DISK_SIZE_BYTES doesn't evenly // divide the size if (params.size.to_bytes() % MIN_DISK_SIZE_BYTES as u64) != 0 { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "total size must be a multiple of {}", ByteCount::from(MIN_DISK_SIZE_BYTES) ), - }); + )); } // Reject disks where the size is greated than MAX_DISK_SIZE_BYTES if params.size.to_bytes() > MAX_DISK_SIZE_BYTES { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "total size must be less than {}", ByteCount::try_from(MAX_DISK_SIZE_BYTES).unwrap() ), - }); + )); } Ok(()) diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index f95c64d3eb..1ab33c5c9c 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -1539,7 +1539,7 @@ mod test { Err(Error::InvalidRequest { message }) => { assert_eq!(rx_label, "empty"); assert_eq!( - message, + message.external_message(), format!( "HTTP request for unknown server name {:?}", authority.host() diff --git a/nexus/src/app/image.rs b/nexus/src/app/image.rs index 5e78b2a096..a7fe75a464 100644 --- a/nexus/src/app/image.rs +++ b/nexus/src/app/image.rs @@ -168,9 +168,11 @@ impl super::Nexus { // disk created from this image has to be larger than it. let size: u64 = 100 * 1024 * 1024; let size: external::ByteCount = - size.try_into().map_err(|e| Error::InvalidValue { - label: String::from("size"), - message: format!("size is invalid: {}", e), + size.try_into().map_err(|e| { + Error::invalid_value( + "size", + format!("size is invalid: {}", e), + ) })?; let new_image_volume = @@ -293,9 +295,9 @@ impl super::Nexus { ) .await } - ImageLookup::SiloImage(_) => Err(Error::InvalidRequest { - message: "Cannot promote a silo image".to_string(), - }), + ImageLookup::SiloImage(_) => { + Err(Error::invalid_request("Cannot promote a silo image")) + } } } @@ -321,9 +323,9 @@ impl super::Nexus { ) .await } - ImageLookup::ProjectImage(_) => Err(Error::InvalidRequest { - message: "Cannot demote a project image".to_string(), - }), + ImageLookup::ProjectImage(_) => { + Err(Error::invalid_request("Cannot demote a project image")) + } } } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 0edb2c5ea7..987a8ac794 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -211,13 +211,13 @@ impl super::Nexus { // Reject instances where the memory is not at least // MIN_MEMORY_BYTES_PER_INSTANCE if params.memory.to_bytes() < MIN_MEMORY_BYTES_PER_INSTANCE as u64 { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "memory must be at least {}", ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) ), - }); + )); } // Reject instances where the memory is not divisible by @@ -225,24 +225,24 @@ impl super::Nexus { if (params.memory.to_bytes() % MIN_MEMORY_BYTES_PER_INSTANCE as u64) != 0 { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "memory must be divisible by {}", ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE) ), - }); + )); } // Reject instances where the memory is greater than the limit if params.memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE { - return Err(Error::InvalidValue { - label: String::from("size"), - message: format!( + return Err(Error::invalid_value( + "size", + format!( "memory must be less than or equal to {}", ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE).unwrap() ), - }); + )); } let saga_params = sagas::instance_create::Params { @@ -376,7 +376,7 @@ impl super::Nexus { } if instance.runtime().migration_id.is_some() { - return Err(Error::unavail("instance is already migrating")); + return Err(Error::conflict("instance is already migrating")); } // Kick off the migration saga @@ -785,12 +785,10 @@ impl super::Nexus { if allowed { Ok(InstanceStateChangeRequestAction::SendToSled(sled_id)) } else { - Err(Error::InvalidRequest { - message: format!( - "instance state cannot be changed from state \"{}\"", - effective_state - ), - }) + Err(Error::invalid_request(format!( + "instance state cannot be changed from state \"{}\"", + effective_state + ))) } } @@ -1231,10 +1229,9 @@ impl super::Nexus { // permissions on both) without verifying the shared hierarchy. To // mitigate that we verify that their parent projects have the same ID. if authz_project.id() != authz_project_disk.id() { - return Err(Error::InvalidRequest { - message: "disk must be in the same project as the instance" - .to_string(), - }); + return Err(Error::invalid_request( + "disk must be in the same project as the instance", + )); } // TODO(https://github.com/oxidecomputer/omicron/issues/811): @@ -1614,28 +1611,22 @@ impl super::Nexus { | InstanceState::Starting | InstanceState::Stopping | InstanceState::Stopped - | InstanceState::Failed => Err(Error::ServiceUnavailable { - internal_message: format!( - "cannot connect to serial console of instance in state \ - {:?}", - vmm.runtime.state.0 - ), - }), - InstanceState::Destroyed => Err(Error::ServiceUnavailable { - internal_message: format!( - "cannot connect to serial console of instance in state \ - {:?}", - InstanceState::Stopped), - }), + | InstanceState::Failed => { + Err(Error::invalid_request(format!( + "cannot connect to serial console of instance in state \"{}\"", + vmm.runtime.state.0, + ))) + } + InstanceState::Destroyed => Err(Error::invalid_request( + "cannot connect to serial console of destroyed instance", + )), } } else { - Err(Error::ServiceUnavailable { - internal_message: format!( - "instance is in state {:?} and has no active serial console \ + Err(Error::invalid_request(format!( + "instance is {} and has no active serial console \ server", - instance.runtime().nexus_state - ) - }) + instance.runtime().nexus_state + ))) } } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 3804841feb..1643ac301d 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -211,10 +211,9 @@ impl super::Nexus { }; let rack_network_config = request.rack_network_config.as_ref().ok_or( - Error::InvalidRequest { - message: "cannot initialize a rack without a network config" - .into(), - }, + Error::invalid_request( + "cannot initialize a rack without a network config", + ), )?; self.db_datastore diff --git a/nexus/src/app/session.rs b/nexus/src/app/session.rs index 891124e1ac..7adf1c9bdd 100644 --- a/nexus/src/app/session.rs +++ b/nexus/src/app/session.rs @@ -154,7 +154,7 @@ impl super::Nexus { | Error::Forbidden | Error::InternalError { .. } | Error::ServiceUnavailable { .. } - | Error::MethodNotAllowed { .. } + | Error::InsufficientCapacity { .. } | Error::TypeVersionMismatch { .. } | Error::Conflict { .. } => { Reason::UnknownError { source: error } diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index a6ffd8ef5e..f5f3fa00e7 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -822,25 +822,24 @@ impl super::Nexus { })?; let response = client.get(url).send().await.map_err(|e| { - Error::InvalidValue { - label: String::from("url"), - message: format!("error querying url: {}", e), - } + Error::invalid_value( + "url", + format!("error querying url: {e}"), + ) })?; if !response.status().is_success() { - return Err(Error::InvalidValue { - label: String::from("url"), - message: format!( - "querying url returned: {}", - response.status() - ), - }); + return Err(Error::invalid_value( + "url", + format!("querying url returned: {}", response.status()), + )); } - response.text().await.map_err(|e| Error::InvalidValue { - label: String::from("url"), - message: format!("error getting text from url: {}", e), + response.text().await.map_err(|e| { + Error::invalid_value( + "url", + format!("error getting text from url: {e}"), + ) })? } @@ -849,12 +848,11 @@ impl super::Nexus { &base64::engine::general_purpose::STANDARD, data, ) - .map_err(|e| Error::InvalidValue { - label: String::from("data"), - message: format!( - "error getting decoding base64 data: {}", - e - ), + .map_err(|e| { + Error::invalid_value( + "data", + format!("error getting decoding base64 data: {e}"), + ) })?; String::from_utf8_lossy(&bytes).into_owned() } diff --git a/nexus/src/app/switch_interface.rs b/nexus/src/app/switch_interface.rs index cfb0541742..0acb2b7fe7 100644 --- a/nexus/src/app/switch_interface.rs +++ b/nexus/src/app/switch_interface.rs @@ -95,9 +95,9 @@ impl super::Nexus { pub fn validate_switch_location(switch_location: &str) -> Result<(), Error> { if switch_location != "switch0" && switch_location != "switch1" { - return Err(Error::InvalidRequest { - message: "Switch location must be switch0 or switch1".into(), - }); + return Err(Error::invalid_request( + "Switch location must be switch0 or switch1", + )); } Ok(()) } diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 5075e421ae..36d4dbcb9e 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -68,14 +68,10 @@ impl super::Nexus { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; let updates_config = self.updates_config.as_ref().ok_or_else(|| { - Error::InvalidRequest { - message: "updates system not configured".into(), - } + Error::invalid_request("updates system not configured") })?; let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::InvalidRequest { - message: "updates system not configured".into(), - } + Error::invalid_request("updates system not configured") })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) .await @@ -158,9 +154,7 @@ impl super::Nexus { ) -> Result, Error> { let mut base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::InvalidRequest { - message: "updates system not configured".into(), - } + Error::invalid_request("updates system not configured") })?; if !base_url.ends_with('/') { base_url.push('/'); diff --git a/nexus/src/app/vpc_router.rs b/nexus/src/app/vpc_router.rs index 81577f88e8..523a450bbd 100644 --- a/nexus/src/app/vpc_router.rs +++ b/nexus/src/app/vpc_router.rs @@ -129,9 +129,7 @@ impl super::Nexus { // router kind cannot be changed, but it might be able to save us a // database round-trip. if db_router.kind == VpcRouterKind::System { - return Err(Error::MethodNotAllowed { - internal_message: "Cannot delete system router".to_string(), - }); + return Err(Error::invalid_request("Cannot delete system router")); } self.db_datastore.vpc_delete_router(opctx, &authz_router).await } @@ -229,14 +227,12 @@ impl super::Nexus { match db_route.kind.0 { RouterRouteKind::Custom | RouterRouteKind::Default => (), _ => { - return Err(Error::MethodNotAllowed { - internal_message: format!( - "routes of type {} from the system table of VPC {:?} \ + return Err(Error::invalid_request(format!( + "routes of type {} from the system table of VPC {:?} \ are not modifiable", - db_route.kind.0, - vpc.id() - ), - }) + db_route.kind.0, + vpc.id() + ))); } } self.db_datastore @@ -255,10 +251,9 @@ impl super::Nexus { // Only custom routes can be deleted // TODO Shouldn't this constraint be checked by the database query? if db_route.kind.0 != RouterRouteKind::Custom { - return Err(Error::MethodNotAllowed { - internal_message: "DELETE not allowed on system routes" - .to_string(), - }); + return Err(Error::invalid_request( + "DELETE not allowed on system routes", + )); } self.db_datastore.router_delete_route(opctx, &authz_route).await } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a2e5f633df..a6fd7a3ccb 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5398,10 +5398,7 @@ async fn role_list( WhichPage::First(..) => None, WhichPage::Next(RolePage { last_seen }) => { Some(last_seen.split_once('.').ok_or_else(|| { - Error::InvalidValue { - label: last_seen.clone(), - message: String::from("bad page token"), - } + Error::invalid_value(last_seen.clone(), "bad page token") })?) .map(|(s1, s2)| (s1.to_string(), s2.to_string())) } diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index f7403275b1..807c054b64 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -992,7 +992,7 @@ async fn test_disk_backed_by_multiple_region_sets( .body(Some(&new_disk)) // TODO: this fails! the current allocation algorithm does not split // across datasets - .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), ) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -1026,7 +1026,7 @@ async fn test_disk_too_big(cptestctx: &ControlPlaneTestContext) { NexusRequest::new( RequestBuilder::new(client, Method::POST, &disks_url) .body(Some(&new_disk)) - .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), ) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -1457,7 +1457,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { NexusRequest::new( RequestBuilder::new(client, Method::POST, &disks_url) .body(Some(&disk_two)) - .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), ) .authn_as(AuthnMode::PrivilegedUser) .execute() diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 33d4d15d23..9260006c81 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3190,7 +3190,7 @@ async fn test_instances_memory_greater_than_max_size( assert!(error.message.contains("memory must be less than")); } -async fn expect_instance_start_fail_unavailable( +async fn expect_instance_start_fail_507( client: &ClientTestContext, instance_name: &str, ) { @@ -3199,13 +3199,15 @@ async fn expect_instance_start_fail_unavailable( http::Method::POST, &get_instance_start_url(instance_name), ) - .expect_status(Some(http::StatusCode::SERVICE_UNAVAILABLE)); + .expect_status(Some(http::StatusCode::INSUFFICIENT_STORAGE)); NexusRequest::new(builder) .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .expect("Expected instance start to fail with SERVICE_UNAVAILABLE"); + .expect( + "Expected instance start to fail with 507 Insufficient Storage", + ); } async fn expect_instance_start_ok( @@ -3296,9 +3298,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( for config in &configs { match config.2 { Ok(_) => expect_instance_start_ok(client, config.0).await, - Err(_) => { - expect_instance_start_fail_unavailable(client, config.0).await - } + Err(_) => expect_instance_start_fail_507(client, config.0).await, } } @@ -3404,9 +3404,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( for config in &configs { match config.2 { Ok(_) => expect_instance_start_ok(client, config.0).await, - Err(_) => { - expect_instance_start_fail_unavailable(client, config.0).await - } + Err(_) => expect_instance_start_fail_507(client, config.0).await, } } diff --git a/nexus/tests/integration_tests/router_routes.rs b/nexus/tests/integration_tests/router_routes.rs index 7a7a33d49d..10c594bba9 100644 --- a/nexus/tests/integration_tests/router_routes.rs +++ b/nexus/tests/integration_tests/router_routes.rs @@ -69,7 +69,7 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { // It errors if you try to delete the default route let error: dropshot::HttpErrorResponseBody = NexusRequest::expect_failure( client, - StatusCode::METHOD_NOT_ALLOWED, + StatusCode::BAD_REQUEST, Method::DELETE, get_route_url("system", "default").as_str(), ) diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index a9ed1b7cb7..24b04bf718 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -793,7 +793,7 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) { }, disk: base_disk_name.into(), })) - .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), ) .authn_as(AuthnMode::PrivilegedUser) .execute() diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 5454e1f68f..466cb5472e 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -363,7 +363,7 @@ async fn test_snapshot_prevents_other_disk( NexusRequest::new( RequestBuilder::new(client, Method::POST, &disks_url) .body(Some(&next_disk)) - .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), ) .authn_as(AuthnMode::PrivilegedUser) .execute() diff --git a/sled-agent/src/common/disk.rs b/sled-agent/src/common/disk.rs index 57868937d0..54c56825eb 100644 --- a/sled-agent/src/common/disk.rs +++ b/sled-agent/src/common/disk.rs @@ -118,12 +118,10 @@ impl DiskStates { | DiskState::ImportingFromBulkWrites | DiskState::Destroyed | DiskState::Faulted => { - return Err(Error::InvalidRequest { - message: format!( - "cannot detach from {}", - self.current.disk_state - ), - }); + return Err(Error::invalid_request(format!( + "cannot detach from {}", + self.current.disk_state + ))); } }; } @@ -134,9 +132,9 @@ impl DiskStates { // (which is a no-op anyway). DiskState::Attaching(id) | DiskState::Attached(id) => { if uuid != id { - return Err(Error::InvalidRequest { - message: "disk is already attached".to_string(), - }); + return Err(Error::invalid_request( + "disk is already attached", + )); } return Ok(None); } @@ -157,12 +155,10 @@ impl DiskStates { | DiskState::Detaching(_) | DiskState::Destroyed | DiskState::Faulted => { - return Err(Error::InvalidRequest { - message: format!( - "cannot attach from {}", - self.current.disk_state - ), - }); + return Err(Error::invalid_request(format!( + "cannot attach from {}", + self.current.disk_state + ))); } } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index a811678a48..057402c57a 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -863,13 +863,11 @@ impl Instance { } return Err(Error::Transition( - omicron_common::api::external::Error::Conflict { - internal_message: format!( - "wrong instance state generation: expected {}, got {}", - inner.state.instance().gen, - old_runtime.gen - ), - }, + omicron_common::api::external::Error::conflict(format!( + "wrong instance state generation: expected {}, got {}", + inner.state.instance().gen, + old_runtime.gen + )), )); } diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index bd6ed4aa90..8dae31863c 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -777,7 +777,7 @@ mod test { let error = disk.transition(DiskStateRequested::Attached(id2)).unwrap_err(); if let Error::InvalidRequest { message } = error { - assert_eq!("disk is already attached", message); + assert_eq!("disk is already attached", message.external_message()); } else { panic!("unexpected error type"); } @@ -829,7 +829,10 @@ mod test { let error = disk.transition(DiskStateRequested::Attached(id)).unwrap_err(); if let Error::InvalidRequest { message } = error { - assert_eq!("cannot attach from detaching", message); + assert_eq!( + "cannot attach from detaching", + message.external_message() + ); } else { panic!("unexpected error type"); } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 15ff83c969..8b00adce60 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -362,13 +362,11 @@ impl SimInstanceInner { } if self.state.instance().gen != old_runtime.gen { - return Err(Error::InvalidRequest { - message: format!( - "wrong Propolis ID generation: expected {}, got {}", - self.state.instance().gen, - old_runtime.gen - ), - }); + return Err(Error::invalid_request(format!( + "wrong Propolis ID generation: expected {}, got {}", + self.state.instance().gen, + old_runtime.gen + ))); } self.state.set_migration_ids(ids, Utc::now()); From ed3671ad082d1b37360a71923c24eb146962930f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 05:41:27 +0000 Subject: [PATCH 078/186] Update Rust crate tokio to 1.35.0 (#4661) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67e1d3784c..8ff57cd451 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8504,9 +8504,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.34.0" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index f8d2a07977..0320962452 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -364,7 +364,7 @@ textwrap = "0.16.0" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.34.0" +tokio = "1.35.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index ce65ddf062..88cadda842 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -98,7 +98,7 @@ subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.34.0", features = ["full", "test-util"] } +tokio = { version = "1.35.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } @@ -200,7 +200,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.34.0", features = ["full", "test-util"] } +tokio = { version = "1.35.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } From bad22d463fc6a25f63357f3fffc5d3e156f19163 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 15:48:30 -0800 Subject: [PATCH 079/186] Update Rust crate openapiv3 to 2.0.0 (#4660) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ff57cd451..dd4b206919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5132,9 +5132,9 @@ dependencies = [ [[package]] name = "openapiv3" -version = "2.0.0-rc.1" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25316406f0191559189c56d99731b63130775de7284d98df5e976ce67882ca8a" +checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ "indexmap 2.1.0", "serde", diff --git a/Cargo.toml b/Cargo.toml index 0320962452..c7a0c7fd42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -263,7 +263,7 @@ oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -openapiv3 = "2.0.0-rc.1" +openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 88cadda842..3aff947fd3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -70,7 +70,7 @@ num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } -openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } +openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -171,7 +171,7 @@ num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } -openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } +openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } From 0b0f007626c270850f3b715b8bee8bbbee67c951 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 10 Dec 2023 05:45:48 +0000 Subject: [PATCH 080/186] Update taiki-e/install-action digest to 6ee6c3a (#4664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`d140130` -> `6ee6c3a`](https://togithub.com/taiki-e/install-action/compare/d140130...6ee6c3a) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 0d1aec4c16..b16f1ca9d7 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@d140130aeedb5a946a5769684d32e3a33539f226 # v2 + uses: taiki-e/install-action@6ee6c3ab83eab434138dfa928d72abc7eae14793 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From fe1bed84ff4965a66c345d17a20fdfb371594e3b Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Sun, 10 Dec 2023 13:50:50 -0800 Subject: [PATCH 081/186] Update propolis and crucible versions (#4658) Crucible Start queue backpressure earlier (#1047) Propolis Fix no-deps option for clippy xtask nvme: don't fail on abort cmd (#581) Update openssl and rustix deps Add xtask for pre-push checks Do not require casting for API version cmp better softnpu management command reliability (#570) Log when pause futures complete (#575) Co-authored-by: Alan Hanson --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd4b206919..7f966651a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -452,7 +452,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" dependencies = [ "bhyve_api_sys", "libc", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" dependencies = [ "libc", "strum", @@ -1275,7 +1275,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" +source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" dependencies = [ "anyhow", "chrono", @@ -1291,7 +1291,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" +source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" dependencies = [ "anyhow", "chrono", @@ -1308,7 +1308,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=945f040d259ca8013d3fb26f510453da7cd7b1a6#945f040d259ca8013d3fb26f510453da7cd7b1a6" +source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" dependencies = [ "crucible-workspace-hack", "libc", @@ -6161,7 +6161,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" dependencies = [ "async-trait", "base64", @@ -6182,7 +6182,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" dependencies = [ "anyhow", "atty", @@ -6212,7 +6212,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=3e1d129151c3621d28ead5c6e5760693ba6e7fec#3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index c7a0c7fd42..5591dcebc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -171,9 +171,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "945f040d259ca8013d3fb26f510453da7cd7b1a6" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -292,9 +292,9 @@ pretty-hex = "0.4.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index bd60fe9e93..8516a50e65 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -384,10 +384,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" +source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "f8c23cbf89fd0bbd928d8e3db1357bbea6e6b50560e221f873da5b56ed9d7527" +source.sha256 = "850b468c308cf63ef9e10addee36a923a91b7ab64af0fa0836130c830fb42863" output.type = "zone" [package.crucible-pantry] @@ -395,10 +395,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" +source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "a25b31c81798eb65564dbe259858fdd9715784d212d3508791b1ef0cf6d17da6" +source.sha256 = "893f845caa5d9b146137b503e80d5615cbd6e9d393745e81e772b10a9072b58b" output.type = "zone" # Refer to @@ -409,10 +409,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" +source.commit = "f1571ce141421cff3d3328f43e7722f5df96fdda" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "cd341409eb2ffc3d8bec89fd20cad61d170f89d3adf926f6104eb01f4f4da881" +source.sha256 = "6e2607f103419a6338936434f3e67afb7cbe14d6397f2d01982ba94b8d0182a9" output.type = "zone" [package.mg-ddm-gz] From 4ad732573f83f19ffb8f9cc1b3f26b1fc0ebd8c5 Mon Sep 17 00:00:00 2001 From: Augustus Mayo Date: Mon, 11 Dec 2023 09:11:16 -0600 Subject: [PATCH 082/186] Fix paths for reflector updates (#4645) Fixes the paths that maghemite reflector updates should look at when detecting changes. This should fix the missing commit id in the PR titles and bodies Co-authored-by: reflector[bot] <123+reflector[bot]@users.noreply.github.com> --- .github/workflows/update-maghemite.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml index b3611f9987..7ced0adf5e 100644 --- a/.github/workflows/update-maghemite.yml +++ b/.github/workflows/update-maghemite.yml @@ -47,7 +47,7 @@ jobs: - name: Extract new maghemite package version run: | - eval $(cat tools/maghemite_openapi_version | grep COMMIT) + eval $(cat tools/maghemite_mg_openapi_version | grep COMMIT) echo "version=${COMMIT:0:7}" >> $GITHUB_OUTPUT id: updated @@ -55,7 +55,7 @@ jobs: run: | . ./tools/reflector/helpers.sh - PATHS=("tools/maghemite_openapi_version") + PATHS=("tools/maghemite_ddm_openapi_version" "tools/maghemite_mg_openapi_version" "tools/maghemite_mgd_checksums") CHANGES=() commit $TARGET_BRANCH $INT_BRANCH ${{ inputs.reflector_user_id }} PATHS CHANGES From 0c5c559745843996d68fa01c406645248621e45d Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 11 Dec 2023 09:52:36 -0800 Subject: [PATCH 083/186] Add functions to catch timeseries schema changes (#4602) - Move schema types from the `oximeter-db` crate to `oximeter` proper - Add a `SchemaSet` for storing a unique set of timeseries schema, and comparing against a set deserialized from file. This works like expectorate, with a few tweaks, and allows developers to catch any changes. If we want to relax this to catching _compatible_ changes, for some definition of that, we can do that pretty easily. --- Cargo.lock | 5 +- openapi/nexus-internal.json | 14 + openapi/sled-agent.json | 14 + oximeter/collector/src/self_stats.rs | 53 ++ .../tests/output/self-stat-schema.json | 91 +++ oximeter/db/src/client.rs | 4 +- oximeter/db/src/lib.rs | 289 +------- oximeter/db/src/model.rs | 83 +-- oximeter/db/src/query.rs | 44 +- oximeter/oximeter/Cargo.toml | 2 + oximeter/oximeter/src/lib.rs | 4 + oximeter/oximeter/src/schema.rs | 640 ++++++++++++++++++ oximeter/oximeter/src/types.rs | 2 + workspace-hack/Cargo.toml | 4 +- 14 files changed, 881 insertions(+), 368 deletions(-) create mode 100644 oximeter/collector/tests/output/self-stat-schema.json create mode 100644 oximeter/oximeter/src/schema.rs diff --git a/Cargo.lock b/Cargo.lock index 7f966651a5..c379dcfbff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5310,6 +5310,7 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "oximeter-macro-impl", + "regex", "rstest", "schemars", "serde", @@ -7528,9 +7529,9 @@ dependencies = [ [[package]] name = "similar" -version = "2.2.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597" dependencies = [ "bstr 0.2.17", "unicode-segmentation", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index caf1414f53..f909710ab4 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4231,6 +4231,20 @@ "content", "type" ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "invalid_timeseries_name" + ] + } + }, + "required": [ + "type" + ] } ] }, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index f809cfa57b..d71f8de644 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5209,6 +5209,20 @@ "content", "type" ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "invalid_timeseries_name" + ] + } + }, + "required": [ + "type" + ] } ] }, diff --git a/oximeter/collector/src/self_stats.rs b/oximeter/collector/src/self_stats.rs index dd1701203e..8d39e6e282 100644 --- a/oximeter/collector/src/self_stats.rs +++ b/oximeter/collector/src/self_stats.rs @@ -154,8 +154,15 @@ impl CollectionTaskStats { #[cfg(test)] mod tests { + use super::Collections; + use super::Cumulative; + use super::FailedCollections; use super::FailureReason; + use super::OximeterCollector; use super::StatusCode; + use oximeter::schema::SchemaSet; + use std::net::IpAddr; + use std::net::Ipv6Addr; #[test] fn test_failure_reason_serialization() { @@ -168,4 +175,50 @@ mod tests { assert_eq!(variant.to_string(), *as_str); } } + + const fn collector() -> OximeterCollector { + OximeterCollector { + collector_id: uuid::uuid!("cfebaa5f-3ba9-4bb5-9145-648d287df78a"), + collector_ip: IpAddr::V6(Ipv6Addr::LOCALHOST), + collector_port: 12345, + } + } + + fn collections() -> Collections { + Collections { + producer_id: uuid::uuid!("718452ab-7cca-42f6-b8b1-1aaaa1b09104"), + producer_ip: IpAddr::V6(Ipv6Addr::LOCALHOST), + producer_port: 12345, + base_route: String::from("/"), + datum: Cumulative::new(0), + } + } + + fn failed_collections() -> FailedCollections { + FailedCollections { + producer_id: uuid::uuid!("718452ab-7cca-42f6-b8b1-1aaaa1b09104"), + producer_ip: IpAddr::V6(Ipv6Addr::LOCALHOST), + producer_port: 12345, + base_route: String::from("/"), + reason: FailureReason::Unreachable.to_string(), + datum: Cumulative::new(0), + } + } + + // Check that the self-stat timeseries schema have not changed. + #[test] + fn test_no_schema_changes() { + let collector = collector(); + let collections = collections(); + let failed = failed_collections(); + let mut set = SchemaSet::default(); + assert!(set.insert_checked(&collector, &collections).is_none()); + assert!(set.insert_checked(&collector, &failed).is_none()); + + const PATH: &'static str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/output/self-stat-schema.json" + ); + set.assert_contents(PATH); + } } diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json new file mode 100644 index 0000000000..0caf2d27e9 --- /dev/null +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -0,0 +1,91 @@ +{ + "oximeter_collector:collections": { + "timeseries_name": "oximeter_collector:collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2023-12-04T17:49:47.797495948Z" + }, + "oximeter_collector:failed_collections": { + "timeseries_name": "oximeter_collector:failed_collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + }, + { + "name": "reason", + "field_type": "string", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2023-12-04T17:49:47.799970009Z" + } +} \ No newline at end of file diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index c8a7db20cb..d295d0dcdf 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -710,7 +710,7 @@ impl Client { &self, sample: &Sample, ) -> Result, Error> { - let sample_schema = model::schema_for(sample); + let sample_schema = TimeseriesSchema::from(sample); let name = sample_schema.timeseries_name.clone(); let mut schema = self.schema.lock().await; @@ -1873,7 +1873,7 @@ mod tests { client.insert_samples(&[sample.clone()]).await.unwrap(); // The internal map should now contain both the new timeseries schema - let actual_schema = model::schema_for(&sample); + let actual_schema = TimeseriesSchema::from(&sample); let timeseries_name = TimeseriesName::try_from(sample.timeseries_name.as_str()).unwrap(); let expected_schema = client diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 425c5189ee..9029319048 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -7,13 +7,23 @@ // Copyright 2023 Oxide Computer Company use crate::query::StringFieldSelector; -use chrono::{DateTime, Utc}; -use dropshot::{EmptyScanParams, PaginationParams}; -pub use oximeter::{DatumType, Field, FieldType, Measurement, Sample}; +use chrono::DateTime; +use chrono::Utc; +use dropshot::EmptyScanParams; +use dropshot::PaginationParams; +pub use oximeter::schema::FieldSchema; +pub use oximeter::schema::FieldSource; +pub use oximeter::schema::TimeseriesName; +pub use oximeter::schema::TimeseriesSchema; +pub use oximeter::DatumType; +pub use oximeter::Field; +pub use oximeter::FieldType; +pub use oximeter::Measurement; +pub use oximeter::Sample; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; +use serde::Serialize; use std::collections::BTreeMap; -use std::collections::BTreeSet; use std::convert::TryFrom; use std::io; use std::num::NonZeroU32; @@ -23,7 +33,8 @@ use thiserror::Error; mod client; pub mod model; pub mod query; -pub use client::{Client, DbWrite}; +pub use client::Client; +pub use client::DbWrite; pub use model::OXIMETER_VERSION; @@ -78,9 +89,6 @@ pub enum Error { #[error("The field comparison {op} is not valid for the type {ty}")] InvalidFieldCmp { op: String, ty: FieldType }, - #[error("Invalid timeseries name")] - InvalidTimeseriesName, - #[error("Query must resolve to a single timeseries if limit is specified")] InvalidLimitQuery, @@ -117,136 +125,6 @@ pub enum Error { NonSequentialSchemaVersions, } -/// A timeseries name. -/// -/// Timeseries are named by concatenating the names of their target and metric, joined with a -/// colon. -#[derive( - Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize, -)] -#[serde(try_from = "&str")] -pub struct TimeseriesName(String); - -impl JsonSchema for TimeseriesName { - fn schema_name() -> String { - "TimeseriesName".to_string() - } - - fn json_schema( - _: &mut schemars::gen::SchemaGenerator, - ) -> schemars::schema::Schema { - schemars::schema::SchemaObject { - metadata: Some(Box::new(schemars::schema::Metadata { - title: Some("The name of a timeseries".to_string()), - description: Some( - "Names are constructed by concatenating the target \ - and metric names with ':'. Target and metric \ - names must be lowercase alphanumeric characters \ - with '_' separating words." - .to_string(), - ), - ..Default::default() - })), - instance_type: Some(schemars::schema::InstanceType::String.into()), - string: Some(Box::new(schemars::schema::StringValidation { - pattern: Some(TIMESERIES_NAME_REGEX.to_string()), - ..Default::default() - })), - ..Default::default() - } - .into() - } -} - -impl std::ops::Deref for TimeseriesName { - type Target = String; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl std::fmt::Display for TimeseriesName { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl std::convert::TryFrom<&str> for TimeseriesName { - type Error = Error; - fn try_from(s: &str) -> Result { - validate_timeseries_name(s).map(|s| TimeseriesName(s.to_string())) - } -} - -impl std::convert::TryFrom for TimeseriesName { - type Error = Error; - fn try_from(s: String) -> Result { - validate_timeseries_name(&s)?; - Ok(TimeseriesName(s)) - } -} - -impl std::str::FromStr for TimeseriesName { - type Err = Error; - fn from_str(s: &str) -> Result { - s.try_into() - } -} - -impl PartialEq for TimeseriesName -where - T: AsRef, -{ - fn eq(&self, other: &T) -> bool { - self.0.eq(other.as_ref()) - } -} - -fn validate_timeseries_name(s: &str) -> Result<&str, Error> { - if regex::Regex::new(TIMESERIES_NAME_REGEX).unwrap().is_match(s) { - Ok(s) - } else { - Err(Error::InvalidTimeseriesName) - } -} - -/// The schema for a timeseries. -/// -/// This includes the name of the timeseries, as well as the datum type of its metric and the -/// schema for each field. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct TimeseriesSchema { - pub timeseries_name: TimeseriesName, - pub field_schema: BTreeSet, - pub datum_type: DatumType, - pub created: DateTime, -} - -impl TimeseriesSchema { - /// Return the schema for the given field. - pub fn field_schema(&self, name: S) -> Option<&FieldSchema> - where - S: AsRef, - { - self.field_schema.iter().find(|field| field.name == name.as_ref()) - } - - /// Return the target and metric component names for this timeseries - pub fn component_names(&self) -> (&str, &str) { - self.timeseries_name - .split_once(':') - .expect("Incorrectly formatted timseries name") - } -} - -impl PartialEq for TimeseriesSchema { - fn eq(&self, other: &TimeseriesSchema) -> bool { - self.timeseries_name == other.timeseries_name - && self.datum_type == other.datum_type - && self.field_schema == other.field_schema - } -} - impl From for TimeseriesSchema { fn from(schema: model::DbTimeseriesSchema) -> TimeseriesSchema { TimeseriesSchema { @@ -285,25 +163,6 @@ pub struct Timeseries { pub measurements: Vec, } -/// The source from which a field is derived, the target or metric. -#[derive( - Clone, - Copy, - Debug, - PartialEq, - Eq, - PartialOrd, - Ord, - Deserialize, - Serialize, - JsonSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum FieldSource { - Target, - Metric, -} - #[derive( Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, )] @@ -329,24 +188,6 @@ impl From for DbFieldSource { } } -/// The name and type information for a field of a timeseries schema. -#[derive( - Clone, - Debug, - PartialEq, - Eq, - PartialOrd, - Ord, - Deserialize, - Serialize, - JsonSchema, -)] -pub struct FieldSchema { - pub name: String, - pub ty: FieldType, - pub source: FieldSource, -} - /// Type used to paginate request to list timeseries schema. pub type TimeseriesSchemaPaginationParams = PaginationParams; @@ -422,19 +263,6 @@ const DATABASE_NAME: &str = "oximeter"; // See https://clickhouse.com/docs/en/interfaces/formats/#jsoneachrow for details. const DATABASE_SELECT_FORMAT: &str = "JSONEachRow"; -// Regular expression describing valid timeseries names. -// -// Names are derived from the names of the Rust structs for the target and metric, converted to -// snake case. So the names must be valid identifiers, and generally: -// -// - Start with lowercase a-z -// - Any number of alphanumerics -// - Zero or more of the above, delimited by '-'. -// -// That describes the target/metric name, and the timeseries is two of those, joined with ':'. -const TIMESERIES_NAME_REGEX: &str = - "(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)"; - #[cfg(test)] mod tests { use super::*; @@ -548,71 +376,16 @@ mod tests { ); } - // Test that we correctly order field across a target and metric. - // - // In an earlier commit, we switched from storing fields in an unordered Vec - // to using a BTree{Map,Set} to ensure ordering by name. However, the - // `TimeseriesSchema` type stored all its fields by chaining the sorted - // fields from the target and metric, without then sorting _across_ them. - // - // This was exacerbated by the error reporting, where we did in fact sort - // all fields across the target and metric, making it difficult to tell how - // the derived schema was different, if at all. - // - // This test generates a sample with a schema where the target and metric - // fields are sorted within them, but not across them. We check that the - // derived schema are actually equal, which means we've imposed that - // ordering when deriving the schema. - #[test] - fn test_schema_field_ordering_across_target_metric() { - let target_field = FieldSchema { - name: String::from("later"), - ty: FieldType::U64, - source: FieldSource::Target, - }; - let metric_field = FieldSchema { - name: String::from("earlier"), - ty: FieldType::U64, - source: FieldSource::Metric, - }; - let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); - let datum_type = DatumType::U64; - let field_schema = - [target_field.clone(), metric_field.clone()].into_iter().collect(); - let expected_schema = TimeseriesSchema { - timeseries_name, - field_schema, - datum_type, - created: Utc::now(), - }; - - #[derive(oximeter::Target)] - struct Foo { - later: u64, - } - #[derive(oximeter::Metric)] - struct Bar { - earlier: u64, - datum: u64, - } - - let target = Foo { later: 1 }; - let metric = Bar { earlier: 2, datum: 10 }; - let sample = Sample::new(&target, &metric).unwrap(); - let derived_schema = model::schema_for(&sample); - assert_eq!(derived_schema, expected_schema); - } - #[test] fn test_unsorted_db_fields_are_sorted_on_read() { let target_field = FieldSchema { name: String::from("later"), - ty: FieldType::U64, + field_type: FieldType::U64, source: FieldSource::Target, }; let metric_field = FieldSchema { name: String::from("earlier"), - ty: FieldType::U64, + field_type: FieldType::U64, source: FieldSource::Metric, }; let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); @@ -632,7 +405,10 @@ mod tests { // the extracted model type. let db_fields = DbFieldList { names: vec![target_field.name.clone(), metric_field.name.clone()], - types: vec![target_field.ty.into(), metric_field.ty.into()], + types: vec![ + target_field.field_type.into(), + metric_field.field_type.into(), + ], sources: vec![ target_field.source.into(), metric_field.source.into(), @@ -646,23 +422,4 @@ mod tests { }; assert_eq!(expected_schema, TimeseriesSchema::from(db_schema)); } - - #[test] - fn test_field_schema_ordering() { - let mut fields = BTreeSet::new(); - fields.insert(FieldSchema { - name: String::from("second"), - ty: FieldType::U64, - source: FieldSource::Target, - }); - fields.insert(FieldSchema { - name: String::from("first"), - ty: FieldType::U64, - source: FieldSource::Target, - }); - let mut iter = fields.iter(); - assert_eq!(iter.next().unwrap().name, "first"); - assert_eq!(iter.next().unwrap().name, "second"); - assert!(iter.next().is_none()); - } } diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index d92e646e89..b1b45eabc4 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -12,7 +12,6 @@ use crate::FieldSource; use crate::Metric; use crate::Target; use crate::TimeseriesKey; -use crate::TimeseriesName; use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; @@ -118,7 +117,7 @@ impl From for BTreeSet { .zip(list.sources) .map(|((name, ty), source)| FieldSchema { name, - ty: ty.into(), + field_type: ty.into(), source: source.into(), }) .collect() @@ -131,8 +130,8 @@ impl From> for DbFieldList { let mut types = Vec::with_capacity(list.len()); let mut sources = Vec::with_capacity(list.len()); for field in list.into_iter() { - names.push(field.name); - types.push(field.ty.into()); + names.push(field.name.to_string()); + types.push(field.field_type.into()); sources.push(field.source.into()); } DbFieldList { names, types, sources } @@ -1233,70 +1232,6 @@ pub(crate) fn unroll_measurement_row_impl( } } -/// Return the schema for a `Sample`. -pub(crate) fn schema_for(sample: &Sample) -> TimeseriesSchema { - // The fields are iterated through whatever order the `Target` or `Metric` - // impl chooses. We'll store in a set ordered by field name, to ignore the - // declaration order. - let created = Utc::now(); - let field_schema = sample - .target_fields() - .map(|field| FieldSchema { - name: field.name.clone(), - ty: field.value.field_type(), - source: FieldSource::Target, - }) - .chain(sample.metric_fields().map(|field| FieldSchema { - name: field.name.clone(), - ty: field.value.field_type(), - source: FieldSource::Metric, - })) - .collect(); - TimeseriesSchema { - timeseries_name: TimeseriesName::try_from( - sample.timeseries_name.as_str(), - ) - .expect("Failed to parse timeseries name"), - field_schema, - datum_type: sample.measurement.datum_type(), - created, - } -} - -/// Return the schema for a `Target` and `Metric` -pub(crate) fn schema_for_parts(target: &T, metric: &M) -> TimeseriesSchema -where - T: traits::Target, - M: traits::Metric, -{ - let make_field_schema = |name: &str, - value: FieldValue, - source: FieldSource| { - FieldSchema { name: name.to_string(), ty: value.field_type(), source } - }; - let target_field_schema = - target.field_names().iter().zip(target.field_values()); - let metric_field_schema = - metric.field_names().iter().zip(metric.field_values()); - let field_schema = target_field_schema - .map(|(name, value)| { - make_field_schema(name, value, FieldSource::Target) - }) - .chain(metric_field_schema.map(|(name, value)| { - make_field_schema(name, value, FieldSource::Metric) - })) - .collect(); - TimeseriesSchema { - timeseries_name: TimeseriesName::try_from(oximeter::timeseries_name( - target, metric, - )) - .expect("Failed to parse timeseries name"), - field_schema, - datum_type: metric.datum_type(), - created: Utc::now(), - } -} - // A scalar timestamped sample from a gauge timeseries, as extracted from a query to the database. #[derive(Debug, Clone, Deserialize)] struct DbTimeseriesScalarGaugeSample { @@ -1669,11 +1604,10 @@ pub(crate) fn parse_field_select_row( "Expected pairs of (field_name, field_value) from the field query" ); let (target_name, metric_name) = schema.component_names(); - let mut n_fields = 0; let mut target_fields = Vec::new(); let mut metric_fields = Vec::new(); let mut actual_fields = row.fields.values(); - while n_fields < schema.field_schema.len() { + for _ in 0..schema.field_schema.len() { // Extract the field name from the row and find a matching expected field. let actual_field_name = actual_fields .next() @@ -1682,7 +1616,7 @@ pub(crate) fn parse_field_select_row( .as_str() .expect("Expected a string field name") .to_string(); - let expected_field = schema.field_schema(&name).expect( + let expected_field = schema.schema_for_field(&name).expect( "Found field with name that is not part of the timeseries schema", ); @@ -1690,7 +1624,7 @@ pub(crate) fn parse_field_select_row( let actual_field_value = actual_fields .next() .expect("Missing a field value from a field select query"); - let value = match expected_field.ty { + let value = match expected_field.field_type { FieldType::Bool => { FieldValue::Bool(bool::from(DbBool::from( actual_field_value @@ -1797,7 +1731,6 @@ pub(crate) fn parse_field_select_row( FieldSource::Target => target_fields.push(field), FieldSource::Metric => metric_fields.push(field), } - n_fields += 1; } ( row.timeseries_key, @@ -1874,12 +1807,12 @@ mod tests { let list: BTreeSet<_> = [ FieldSchema { name: String::from("field0"), - ty: FieldType::I64, + field_type: FieldType::I64, source: FieldSource::Target, }, FieldSchema { name: String::from("field1"), - ty: FieldType::IpAddr, + field_type: FieldType::IpAddr, source: FieldSource::Metric, }, ] diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index 6a55d3f518..2caefb24c3 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -101,7 +101,7 @@ impl SelectQueryBuilder { let field_name = field_name.as_ref().to_string(); let field_schema = self .timeseries_schema - .field_schema(&field_name) + .schema_for_field(&field_name) .ok_or_else(|| Error::NoSuchField { timeseries_name: self .timeseries_schema @@ -110,7 +110,7 @@ impl SelectQueryBuilder { field_name: field_name.clone(), })?; let field_value: FieldValue = field_value.into(); - let expected_type = field_schema.ty; + let expected_type = field_schema.field_type; let found_type = field_value.field_type(); if expected_type != found_type { return Err(Error::IncorrectFieldType { @@ -150,7 +150,7 @@ impl SelectQueryBuilder { ) -> Result { let field_schema = self .timeseries_schema - .field_schema(&selector.name) + .schema_for_field(&selector.name) .ok_or_else(|| Error::NoSuchField { timeseries_name: self .timeseries_schema @@ -158,13 +158,14 @@ impl SelectQueryBuilder { .to_string(), field_name: selector.name.clone(), })?; - if !selector.op.valid_for_type(field_schema.ty) { + let field_type = field_schema.field_type; + if !selector.op.valid_for_type(field_type) { return Err(Error::InvalidFieldCmp { op: format!("{:?}", selector.op), - ty: field_schema.ty, + ty: field_schema.field_type, }); } - let field_value = match field_schema.ty { + let field_value = match field_type { FieldType::String => FieldValue::from(&selector.value), FieldType::I8 => parse_selector_field_value::( &field_schema, @@ -214,9 +215,9 @@ impl SelectQueryBuilder { let comparison = FieldComparison { op: selector.op, value: field_value }; let selector = FieldSelector { - name: field_schema.name.clone(), + name: field_schema.name.to_string(), comparison: Some(comparison), - ty: field_schema.ty, + ty: field_type, }; self.field_selectors.insert(field_schema.clone(), selector); Ok(self) @@ -248,7 +249,7 @@ impl SelectQueryBuilder { T: Target, M: Metric, { - let schema = crate::model::schema_for_parts(target, metric); + let schema = TimeseriesSchema::new(target, metric); let mut builder = Self::new(&schema); let target_fields = target.field_names().iter().zip(target.field_values()); @@ -279,9 +280,9 @@ impl SelectQueryBuilder { for field in timeseries_schema.field_schema.iter() { let key = field.clone(); field_selectors.entry(key).or_insert_with(|| FieldSelector { - name: field.name.clone(), + name: field.name.to_string(), comparison: None, - ty: field.ty, + ty: field.field_type, }); } SelectQuery { @@ -309,8 +310,8 @@ where { Ok(FieldValue::from(s.parse::().map_err(|_| { Error::InvalidFieldValue { - field_name: field.name.clone(), - field_type: field.ty, + field_name: field.name.to_string(), + field_type: field.field_type, value: s.to_string(), } })?)) @@ -778,12 +779,12 @@ mod tests { field_schema: [ FieldSchema { name: "f0".to_string(), - ty: FieldType::I64, + field_type: FieldType::I64, source: FieldSource::Target, }, FieldSchema { name: "f1".to_string(), - ty: FieldType::Bool, + field_type: FieldType::Bool, source: FieldSource::Target, }, ] @@ -981,6 +982,7 @@ mod tests { "Expected an exact comparison when building a query from parts", ); + println!("{builder:#?}"); assert_eq!( builder.field_selector(FieldSource::Metric, "baz").unwrap(), &FieldSelector { @@ -1002,12 +1004,12 @@ mod tests { field_schema: [ FieldSchema { name: "f0".to_string(), - ty: FieldType::I64, + field_type: FieldType::I64, source: FieldSource::Target, }, FieldSchema { name: "f1".to_string(), - ty: FieldType::Bool, + field_type: FieldType::Bool, source: FieldSource::Target, }, ] @@ -1065,12 +1067,12 @@ mod tests { field_schema: [ FieldSchema { name: "f0".to_string(), - ty: FieldType::I64, + field_type: FieldType::I64, source: FieldSource::Target, }, FieldSchema { name: "f1".to_string(), - ty: FieldType::Bool, + field_type: FieldType::Bool, source: FieldSource::Target, }, ] @@ -1116,12 +1118,12 @@ mod tests { field_schema: [ FieldSchema { name: "f0".to_string(), - ty: FieldType::I64, + field_type: FieldType::I64, source: FieldSource::Target, }, FieldSchema { name: "f1".to_string(), - ty: FieldType::Bool, + field_type: FieldType::Bool, source: FieldSource::Target, }, ] diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index 0cb2d8cace..b545c697de 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -11,8 +11,10 @@ chrono.workspace = true num.workspace = true omicron-common.workspace = true oximeter-macro-impl.workspace = true +regex.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true +serde_json.workspace = true strum.workspace = true thiserror.workspace = true uuid.workspace = true diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 2ced404eae..1855762abe 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -108,10 +108,14 @@ pub use oximeter_macro_impl::*; extern crate self as oximeter; pub mod histogram; +pub mod schema; pub mod test_util; pub mod traits; pub mod types; +pub use schema::FieldSchema; +pub use schema::TimeseriesName; +pub use schema::TimeseriesSchema; pub use traits::Metric; pub use traits::Producer; pub use traits::Target; diff --git a/oximeter/oximeter/src/schema.rs b/oximeter/oximeter/src/schema.rs new file mode 100644 index 0000000000..b6953fda52 --- /dev/null +++ b/oximeter/oximeter/src/schema.rs @@ -0,0 +1,640 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +//! Tools for working with schema for fields and timeseries. + +use crate::types::DatumType; +use crate::types::FieldType; +use crate::types::MetricsError; +use crate::types::Sample; +use crate::Metric; +use crate::Target; +use chrono::DateTime; +use chrono::Utc; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::fmt::Write; +use std::path::Path; + +/// The name and type information for a field of a timeseries schema. +#[derive( + Clone, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +pub struct FieldSchema { + pub name: String, + pub field_type: FieldType, + pub source: FieldSource, +} + +/// The source from which a field is derived, the target or metric. +#[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum FieldSource { + Target, + Metric, +} + +/// A timeseries name. +/// +/// Timeseries are named by concatenating the names of their target and metric, joined with a +/// colon. +#[derive( + Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize, +)] +#[serde(try_from = "&str")] +pub struct TimeseriesName(String); + +impl JsonSchema for TimeseriesName { + fn schema_name() -> String { + "TimeseriesName".to_string() + } + + fn json_schema( + _: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + schemars::schema::SchemaObject { + metadata: Some(Box::new(schemars::schema::Metadata { + title: Some("The name of a timeseries".to_string()), + description: Some( + "Names are constructed by concatenating the target \ + and metric names with ':'. Target and metric \ + names must be lowercase alphanumeric characters \ + with '_' separating words." + .to_string(), + ), + ..Default::default() + })), + instance_type: Some(schemars::schema::InstanceType::String.into()), + string: Some(Box::new(schemars::schema::StringValidation { + pattern: Some(TIMESERIES_NAME_REGEX.to_string()), + ..Default::default() + })), + ..Default::default() + } + .into() + } +} + +impl std::ops::Deref for TimeseriesName { + type Target = String; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::fmt::Display for TimeseriesName { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::convert::TryFrom<&str> for TimeseriesName { + type Error = MetricsError; + fn try_from(s: &str) -> Result { + validate_timeseries_name(s).map(|s| TimeseriesName(s.to_string())) + } +} + +impl std::convert::TryFrom for TimeseriesName { + type Error = MetricsError; + fn try_from(s: String) -> Result { + validate_timeseries_name(&s)?; + Ok(TimeseriesName(s)) + } +} + +impl std::str::FromStr for TimeseriesName { + type Err = MetricsError; + fn from_str(s: &str) -> Result { + s.try_into() + } +} + +impl PartialEq for TimeseriesName +where + T: AsRef, +{ + fn eq(&self, other: &T) -> bool { + self.0.eq(other.as_ref()) + } +} + +fn validate_timeseries_name(s: &str) -> Result<&str, MetricsError> { + if regex::Regex::new(TIMESERIES_NAME_REGEX).unwrap().is_match(s) { + Ok(s) + } else { + Err(MetricsError::InvalidTimeseriesName) + } +} + +/// The schema for a timeseries. +/// +/// This includes the name of the timeseries, as well as the datum type of its metric and the +/// schema for each field. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct TimeseriesSchema { + pub timeseries_name: TimeseriesName, + pub field_schema: BTreeSet, + pub datum_type: DatumType, + pub created: DateTime, +} + +impl From<&Sample> for TimeseriesSchema { + fn from(sample: &Sample) -> Self { + let timeseries_name = sample.timeseries_name.parse().unwrap(); + let mut field_schema = BTreeSet::new(); + for field in sample.target_fields() { + let schema = FieldSchema { + name: field.name.clone(), + field_type: field.value.field_type(), + source: FieldSource::Target, + }; + field_schema.insert(schema); + } + for field in sample.metric_fields() { + let schema = FieldSchema { + name: field.name.clone(), + field_type: field.value.field_type(), + source: FieldSource::Metric, + }; + field_schema.insert(schema); + } + let datum_type = sample.measurement.datum_type(); + Self { timeseries_name, field_schema, datum_type, created: Utc::now() } + } +} + +impl TimeseriesSchema { + /// Construct a timeseries schema from a target and metric. + pub fn new(target: &T, metric: &M) -> Self + where + T: Target, + M: Metric, + { + let timeseries_name = + TimeseriesName::try_from(crate::timeseries_name(target, metric)) + .unwrap(); + let mut field_schema = BTreeSet::new(); + for field in target.fields() { + let schema = FieldSchema { + name: field.name.clone(), + field_type: field.value.field_type(), + source: FieldSource::Target, + }; + field_schema.insert(schema); + } + for field in metric.fields() { + let schema = FieldSchema { + name: field.name.clone(), + field_type: field.value.field_type(), + source: FieldSource::Metric, + }; + field_schema.insert(schema); + } + let datum_type = metric.datum_type(); + Self { timeseries_name, field_schema, datum_type, created: Utc::now() } + } + + /// Construct a timeseries schema from a sample + pub fn from_sample(sample: &Sample) -> Self { + Self::from(sample) + } + + /// Return the schema for the given field. + pub fn schema_for_field(&self, name: S) -> Option<&FieldSchema> + where + S: AsRef, + { + self.field_schema.iter().find(|field| field.name == name.as_ref()) + } + + /// Return the target and metric component names for this timeseries + pub fn component_names(&self) -> (&str, &str) { + self.timeseries_name + .split_once(':') + .expect("Incorrectly formatted timseries name") + } +} + +impl PartialEq for TimeseriesSchema { + fn eq(&self, other: &TimeseriesSchema) -> bool { + self.timeseries_name == other.timeseries_name + && self.datum_type == other.datum_type + && self.field_schema == other.field_schema + } +} + +// Regular expression describing valid timeseries names. +// +// Names are derived from the names of the Rust structs for the target and metric, converted to +// snake case. So the names must be valid identifiers, and generally: +// +// - Start with lowercase a-z +// - Any number of alphanumerics +// - Zero or more of the above, delimited by '-'. +// +// That describes the target/metric name, and the timeseries is two of those, joined with ':'. +const TIMESERIES_NAME_REGEX: &str = + "(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)"; + +/// A set of timeseries schema, useful for testing changes to targets or +/// metrics. +#[derive(Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct SchemaSet { + #[serde(flatten)] + inner: BTreeMap, +} + +impl SchemaSet { + /// Insert a timeseries schema, checking for conflicts. + /// + /// This inserts the schema derived from `target` and `metric`. If one + /// does _not_ already exist in `self` or a _matching_ one exists, `None` + /// is returned. + /// + /// If the derived schema _conflicts_ with one in `self`, the existing + /// schema is returned. + pub fn insert_checked( + &mut self, + target: &T, + metric: &M, + ) -> Option + where + T: Target, + M: Metric, + { + let new = TimeseriesSchema::new(target, metric); + let name = new.timeseries_name.clone(); + match self.inner.entry(name) { + Entry::Vacant(entry) => { + entry.insert(new); + None + } + Entry::Occupied(entry) => { + let existing = entry.get(); + if existing == &new { + None + } else { + Some(existing.clone()) + } + } + } + } + + /// Compare the set of schema against the contents of a file. + /// + /// This function loads a `SchemaSet` from the provided JSON file, and + /// asserts that the contained schema matches those in `self`. Note that + /// equality of `TimeseriesSchema` ignores creation timestamps, so this + /// compares the "identity" data: timeseries name, field names, field types, + /// and field sources. + /// + /// This is intentionally similar to `expectorate::assert_contents()`. If + /// the provided file doesn't exist, it's treated as empty. If it does, a + /// `SchemaSet` is deserialized from it and a comparison between that and + /// `self` is done. + /// + /// You can use `EXPECTORATE=overwrite` to overwrite the existing file, + /// rather than panicking. + pub fn assert_contents(&self, path: impl AsRef) { + let path = path.as_ref(); + let v = std::env::var_os("EXPECTORATE"); + let overwrite = + v.as_deref().and_then(std::ffi::OsStr::to_str) == Some("overwrite"); + let expected_contents = serde_json::to_string_pretty(self).unwrap(); + if overwrite { + if let Err(e) = std::fs::write(path, &expected_contents) { + panic!( + "Failed to write contents to '{}': {}", + path.display(), + e + ); + } + } else { + // If the file doesn't exist, it's just empty and we'll create an + // empty set of schema. + let contents = if !path.exists() { + String::from("{}") + } else { + match std::fs::read_to_string(path) { + Err(e) => { + panic!("Failed to read '{}': {}", path.display(), e) + } + Ok(c) => c, + } + }; + let other: Self = serde_json::from_str(&contents).unwrap(); + if self == &other { + return; + } + + let mut diffs = String::new(); + writeln!( + &mut diffs, + "Timeseries schema in \"{}\" do not match\n", + path.display() + ) + .unwrap(); + + // Print schema in self that are not in the file, or mismatched + // schema. + for (name, schema) in self.inner.iter() { + let Some(other_schema) = other.inner.get(name) else { + writeln!( + &mut diffs, + "File is missing timeseries \"{}\"", + name + ) + .unwrap(); + continue; + }; + if schema == other_schema { + continue; + } + writeln!(&mut diffs, "Timeseries \"{name}\" differs").unwrap(); + + // Print out any differences in the datum type. + if schema.datum_type != other_schema.datum_type { + writeln!( + &mut diffs, + " Expected datum type: {}", + schema.datum_type + ) + .unwrap(); + writeln!( + &mut diffs, + " Actual datum type: {}", + other_schema.datum_type + ) + .unwrap(); + } + + // Print fields in self that are not in other, or are mismatched + for field in schema.field_schema.iter() { + let Some(other_field) = + other_schema.field_schema.get(field) + else { + writeln!( + &mut diffs, + " File is missing {:?} field \"{}\"", + field.source, field.name, + ) + .unwrap(); + continue; + }; + if field == other_field { + continue; + } + + writeln!( + &mut diffs, + " File has mismatched field \"{}\"", + field.name + ) + .unwrap(); + writeln!( + &mut diffs, + " Expected type: {}", + field.field_type + ) + .unwrap(); + writeln!( + &mut diffs, + " Actual type: {}", + other_field.field_type + ) + .unwrap(); + writeln!( + &mut diffs, + " Expected source: {:?}", + field.source + ) + .unwrap(); + writeln!( + &mut diffs, + " Actual source: {:?}", + other_field.source + ) + .unwrap(); + } + + // Print fields in other that are not in self, fields that are + // in both but don't match are taken care of in the above loop. + for other_field in other_schema.field_schema.iter() { + if schema.field_schema.contains(other_field) { + continue; + } + + writeln!( + &mut diffs, + " Current set is missing {:?} field \"{}\"", + other_field.source, other_field.name, + ) + .unwrap(); + } + } + + // Print schema that are in the file, but not self. Those that don't + // match are handled in the above block. + for key in other.inner.keys() { + if !self.inner.contains_key(key) { + writeln!( + &mut diffs, + " Current set is missing timeseries \"{}\"", + key + ) + .unwrap(); + } + } + panic!("{}", diffs); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::convert::TryFrom; + use uuid::Uuid; + + #[test] + fn test_timeseries_name() { + let name = TimeseriesName::try_from("foo:bar").unwrap(); + assert_eq!(format!("{}", name), "foo:bar"); + } + + #[test] + fn test_timeseries_name_from_str() { + assert!(TimeseriesName::try_from("a:b").is_ok()); + assert!(TimeseriesName::try_from("a_a:b_b").is_ok()); + assert!(TimeseriesName::try_from("a0:b0").is_ok()); + assert!(TimeseriesName::try_from("a_0:b_0").is_ok()); + + assert!(TimeseriesName::try_from("_:b").is_err()); + assert!(TimeseriesName::try_from("a_:b").is_err()); + assert!(TimeseriesName::try_from("0:b").is_err()); + assert!(TimeseriesName::try_from(":b").is_err()); + assert!(TimeseriesName::try_from("a:").is_err()); + assert!(TimeseriesName::try_from("123").is_err()); + } + + #[derive(Target)] + struct MyTarget { + id: Uuid, + name: String, + } + + const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); + + impl Default for MyTarget { + fn default() -> Self { + Self { id: ID, name: String::from("name") } + } + } + + #[derive(Metric)] + struct MyMetric { + happy: bool, + datum: u64, + } + + impl Default for MyMetric { + fn default() -> Self { + Self { happy: true, datum: 0 } + } + } + + #[test] + fn test_timeseries_schema_from_parts() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let schema = TimeseriesSchema::new(&target, &metric); + + assert_eq!(schema.timeseries_name, "my_target:my_metric"); + let f = schema.schema_for_field("id").unwrap(); + assert_eq!(f.name, "id"); + assert_eq!(f.field_type, FieldType::Uuid); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("name").unwrap(); + assert_eq!(f.name, "name"); + assert_eq!(f.field_type, FieldType::String); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("happy").unwrap(); + assert_eq!(f.name, "happy"); + assert_eq!(f.field_type, FieldType::Bool); + assert_eq!(f.source, FieldSource::Metric); + assert_eq!(schema.datum_type, DatumType::U64); + } + + #[test] + fn test_timeseries_schema_from_sample() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let sample = Sample::new(&target, &metric).unwrap(); + let schema = TimeseriesSchema::new(&target, &metric); + let schema_from_sample = TimeseriesSchema::from(&sample); + assert_eq!(schema, schema_from_sample); + } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + field_type: FieldType::U64, + source: FieldSource::Target, + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + field_type: FieldType::U64, + source: FieldSource::Metric, + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + field_schema, + datum_type, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = TimeseriesSchema::from(&sample); + assert_eq!(derived_schema, expected_schema); + } + + #[test] + fn test_field_schema_ordering() { + let mut fields = BTreeSet::new(); + fields.insert(FieldSchema { + name: String::from("second"), + field_type: FieldType::U64, + source: FieldSource::Target, + }); + fields.insert(FieldSchema { + name: String::from("first"), + field_type: FieldType::U64, + source: FieldSource::Target, + }); + let mut iter = fields.iter(); + assert_eq!(iter.next().unwrap().name, "first"); + assert_eq!(iter.next().unwrap().name, "second"); + assert!(iter.next().is_none()); + } +} diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index 23dbe2be6b..3d74bec72c 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -629,6 +629,8 @@ pub enum MetricsError { #[error("Missing datum of type {datum_type} cannot have a start time")] MissingDatumCannotHaveStartTime { datum_type: DatumType }, + #[error("Invalid timeseries name")] + InvalidTimeseriesName, } impl From for omicron_common::api::external::Error { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3aff947fd3..1d14b26a69 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -89,7 +89,7 @@ semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.2.1", features = ["inline", "unicode"] } +similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } spin = { version = "0.9.8" } @@ -190,7 +190,7 @@ semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.2.1", features = ["inline", "unicode"] } +similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } spin = { version = "0.9.8" } From baf7347b6b6b4416930b6f4eaba0d45db1694982 Mon Sep 17 00:00:00 2001 From: Justin Bennett Date: Mon, 11 Dec 2023 13:58:57 -0500 Subject: [PATCH 084/186] Explicitly use the alloc op context for dpd nat updates (#4654) Passes the alloc opcontext to the boundary switches call to avoid a permissions issue when deleting nat entries while deleting an instance. --------- Co-authored-by: James MacMahon --- nexus/src/app/instance_network.rs | 5 +++-- nexus/tests/integration_tests/instances.rs | 24 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index abb8c744e1..3db749f43b 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -451,8 +451,6 @@ impl super::Nexus { .instance_lookup_external_ips(opctx, instance_id) .await?; - let boundary_switches = self.boundary_switches(opctx).await?; - let mut errors = vec![]; for entry in external_ips { // Soft delete the NAT entry @@ -478,6 +476,9 @@ impl super::Nexus { }?; } + let boundary_switches = + self.boundary_switches(&self.opctx_alloc).await?; + for switch in &boundary_switches { debug!(&self.log, "notifying dendrite of updates"; "instance_id" => %authz_instance.id(), diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 9260006c81..19b507f5bb 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3911,6 +3911,30 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { instance_simulate_with_opctx(nexus, &instance.identity.id, &opctx).await; let instance = instance_get_as(&client, &instance_url, authn).await; assert_eq!(instance.runtime.run_state, InstanceState::Running); + + // Stop the instance + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + &format!("/v1/instances/{}/stop", instance.identity.id), + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::SiloUser(user_id)) + .execute() + .await + .expect("Failed to stop the instance"); + + instance_simulate_with_opctx(nexus, &instance.identity.id, &opctx).await; + + // Delete the instance + NexusRequest::object_delete(client, &instance_url) + .authn_as(AuthnMode::SiloUser(user_id)) + .execute() + .await + .expect("Failed to delete the instance"); } /// Test that appropriate OPTE V2P mappings are created and deleted. From 23f149aaaf81c628c88b9d5b2550ca24edc05272 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 11 Dec 2023 13:55:35 -0800 Subject: [PATCH 085/186] Add simulated crucible pantry checks (#4600) --- Cargo.lock | 113 +++++++++++++++--- Cargo.toml | 1 + sled-agent/Cargo.toml | 1 + sled-agent/src/sim/http_entrypoints_pantry.rs | 98 +++++++++++++++ workspace-hack/Cargo.toml | 12 +- 5 files changed, 205 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c379dcfbff..dfab530a28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,13 +54,15 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", + "getrandom 0.2.10", "once_cell", "version_check", + "zerocopy 0.7.26", ] [[package]] @@ -780,9 +782,9 @@ dependencies = [ [[package]] name = "cargo_metadata" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb9ac64500cc83ce4b9f8dafa78186aa008c8dea77a09b94cd307fd0cd5022a8" +checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" dependencies = [ "camino", "cargo-platform", @@ -841,6 +843,16 @@ dependencies = [ "nom", ] +[[package]] +name = "cfg-expr" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03915af431787e6ffdcc74c645077518c6b6e01f80b761e0fbbfa288536311b3" +dependencies = [ + "smallvec 1.11.2", + "target-lexicon", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -2658,6 +2670,39 @@ dependencies = [ "subtle", ] +[[package]] +name = "guppy" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "114a100a9aa9f4c468a7b9e96626cdab267bb652660d8408e8f6d56d4c310edd" +dependencies = [ + "ahash", + "camino", + "cargo_metadata", + "cfg-if", + "debug-ignore", + "fixedbitset", + "guppy-workspace-hack", + "indexmap 2.1.0", + "itertools 0.12.0", + "nested", + "once_cell", + "pathdiff", + "petgraph", + "semver 1.0.20", + "serde", + "serde_json", + "smallvec 1.11.2", + "static_assertions", + "target-spec", +] + +[[package]] +name = "guppy-workspace-hack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92620684d99f750bae383ecb3be3748142d6095760afd5cbcf2261e9a279d780" + [[package]] name = "h2" version = "0.3.21" @@ -3962,6 +4007,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nested" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b420f638f07fe83056b55ea190bb815f609ec5a35e7017884a10f78839c9e" + [[package]] name = "new_debug_unreachable" version = "1.0.4" @@ -4235,7 +4286,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" dependencies = [ - "smallvec 1.11.0", + "smallvec 1.11.2", ] [[package]] @@ -4321,7 +4372,7 @@ dependencies = [ "num-traits", "rand 0.8.5", "serde", - "smallvec 1.11.0", + "smallvec 1.11.2", "zeroize", ] @@ -4868,6 +4919,7 @@ dependencies = [ "futures", "gateway-client", "glob", + "guppy", "hex", "http", "hyper", @@ -4965,6 +5017,7 @@ dependencies = [ name = "omicron-workspace-hack" version = "0.1.0" dependencies = [ + "ahash", "anyhow", "base16ct", "bit-set", @@ -5070,6 +5123,7 @@ dependencies = [ "usdt", "uuid", "yasna", + "zerocopy 0.7.26", "zeroize", "zip", ] @@ -5540,7 +5594,7 @@ dependencies = [ "instant", "libc", "redox_syscall 0.2.16", - "smallvec 1.11.0", + "smallvec 1.11.2", "winapi", ] @@ -5553,7 +5607,7 @@ dependencies = [ "cfg-if", "libc", "redox_syscall 0.3.5", - "smallvec 1.11.0", + "smallvec 1.11.2", "windows-targets 0.48.5", ] @@ -5629,6 +5683,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498a099351efa4becc6a19c72aa9270598e8fd274ca47052e37455241c88b696" +[[package]] +name = "pathdiff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +dependencies = [ + "camino", +] + [[package]] name = "pbkdf2" version = "0.11.0" @@ -7231,9 +7294,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] @@ -7269,9 +7332,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", @@ -7766,9 +7829,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "smawk" @@ -8226,6 +8289,24 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.12.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" + +[[package]] +name = "target-spec" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48b81540ee78bd9de9f7dca2378f264cf1f4193da6e2d09b54c0d595131a48f1" +dependencies = [ + "cfg-expr", + "guppy-workspace-hack", + "target-lexicon", + "unicode-ident", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -8813,7 +8894,7 @@ dependencies = [ "ipnet", "lazy_static", "rand 0.8.5", - "smallvec 1.11.0", + "smallvec 1.11.2", "thiserror", "tinyvec", "tokio", @@ -8834,7 +8915,7 @@ dependencies = [ "lru-cache", "parking_lot 0.12.1", "resolv-conf", - "smallvec 1.11.0", + "smallvec 1.11.2", "thiserror", "tokio", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 5591dcebc9..7ea34b795f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,6 +202,7 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9" } gateway-test-utils = { path = "gateway-test-utils" } glob = "0.3.1" +guppy = "0.17.4" headers = "0.3.9" heck = "0.4" hex = "0.4.3" diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 3f7fd1c7f2..b734248f32 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -85,6 +85,7 @@ opte-ioctl.workspace = true [dev-dependencies] assert_matches.workspace = true expectorate.workspace = true +guppy.workspace = true http.workspace = true hyper.workspace = true omicron-test-utils.workspace = true diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 8430dc0731..8f572b46a0 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -280,3 +280,101 @@ async fn detach( Ok(HttpResponseDeleted()) } + +#[cfg(test)] +mod tests { + use guppy::graph::ExternalSource; + use guppy::graph::GitReq; + use guppy::graph::PackageGraph; + use guppy::MetadataCommand; + use serde_json::Value; + use std::path::Path; + + fn load_real_api_as_json() -> serde_json::Value { + let manifest_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("Cargo.toml"); + let mut cmd = MetadataCommand::new(); + cmd.manifest_path(&manifest_path); + let graph = PackageGraph::from_command(&mut cmd).unwrap(); + let package = graph + .packages() + .find(|pkg| pkg.name() == "crucible-pantry-client") + .unwrap(); + let ExternalSource::Git { req, .. } = + package.source().parse_external().unwrap() + else { + panic!("This should be a Git dependency"); + }; + let part = match req { + GitReq::Branch(inner) => inner, + GitReq::Rev(inner) => inner, + GitReq::Tag(inner) => inner, + GitReq::Default => "main", + _ => unreachable!(), + }; + let raw_url = format!( + "https://raw.githubusercontent.com/oxidecomputer/crucible/{part}/openapi/crucible-pantry.json", + ); + let raw_json = + reqwest::blocking::get(&raw_url).unwrap().text().unwrap(); + serde_json::from_str(&raw_json).unwrap() + } + + // Regression test for https://github.com/oxidecomputer/omicron/issues/4599. + #[test] + fn test_simulated_api_matches_real() { + let real_api = load_real_api_as_json(); + let Value::String(ref title) = real_api["info"]["title"] else { + unreachable!(); + }; + let Value::String(ref version) = real_api["info"]["version"] else { + unreachable!(); + }; + let sim_api = super::api().openapi(title, version).json().unwrap(); + + // We'll assert that anything which apppears in the simulated API must + // appear exactly as-is in the real API. I.e., the simulated is a subset + // (possibly non-strict) of the real API. + compare_json_values(&sim_api, &real_api, String::new()); + } + + fn compare_json_values(lhs: &Value, rhs: &Value, path: String) { + match lhs { + Value::Array(values) => { + let Value::Array(rhs_values) = &rhs else { + panic!( + "Expected an array in the real API JSON at \ + path \"{path}\", found {rhs:?}", + ); + }; + assert_eq!(values.len(), rhs_values.len()); + for (i, (left, right)) in + values.iter().zip(rhs_values.iter()).enumerate() + { + let new_path = format!("{path}[{i}]"); + compare_json_values(left, right, new_path); + } + } + Value::Object(map) => { + let Value::Object(rhs_map) = &rhs else { + panic!( + "Expected a map in the real API JSON at \ + path \"{path}\", found {rhs:?}", + ); + }; + for (key, value) in map.iter() { + let new_path = format!("{path}/{key}"); + let rhs_value = rhs_map.get(key).unwrap_or_else(|| { + panic!("Real API JSON missing key: \"{new_path}\"") + }); + compare_json_values(value, rhs_value, new_path); + } + } + _ => { + assert_eq!(lhs, rhs, "Mismatched keys at JSON path \"{path}\"") + } + } + } +} diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1d14b26a69..f462fd5b6d 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -14,6 +14,7 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] +ahash = { version = "0.8.6" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -86,8 +87,8 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value"] } +serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -111,10 +112,12 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } +zerocopy = { version = "0.7.26", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [build-dependencies] +ahash = { version = "0.8.6" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -187,8 +190,8 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value"] } +serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -213,6 +216,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } +zerocopy = { version = "0.7.26", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } From c9bb9c13ca7f411c4f78e653c1dd472b5c8de095 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Mon, 11 Dec 2023 16:08:05 -0800 Subject: [PATCH 086/186] inventory builder could better distinguish runtime errors from API misuse (#4672) --- Cargo.lock | 1 + nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/builder.rs | 73 ++++++++++++++++++++++---------- nexus/inventory/src/collector.rs | 11 +++-- nexus/inventory/src/lib.rs | 2 + 5 files changed, 62 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfab530a28..1a032c9680 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4193,6 +4193,7 @@ dependencies = [ "regex", "slog", "strum", + "thiserror", "tokio", "uuid", ] diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 6bb63cf9f7..22b48ebcec 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -13,6 +13,7 @@ gateway-messages.workspace = true nexus-types.workspace = true slog.workspace = true strum.workspace = true +thiserror.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 188a48b553..2d8ba0d1f9 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -27,8 +27,40 @@ use nexus_types::inventory::ServiceProcessor; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::sync::Arc; +use thiserror::Error; use uuid::Uuid; +/// Describes an operational error encountered during the collection process +/// +/// Examples include a down MGS instance, failure to parse a response from some +/// other service, etc. We currently don't need to distinguish these +/// programmatically. +#[derive(Debug, Error)] +pub struct InventoryError(#[from] anyhow::Error); + +impl std::fmt::Display for InventoryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:#}", self.0) + } +} + +/// Describes a mis-use of the [`CollectionBuilder`] object +/// +/// Example: reporting information about a caboose when the caller has not +/// already reported information about the corresopnding baseboard. +/// +/// Unlike `InventoryError`s, which can always happen in a real system, these +/// errors are not ever expected. Ideally, all of these problems would be +/// compile errors. +#[derive(Debug, Error)] +pub struct CollectorBug(#[from] anyhow::Error); + +impl std::fmt::Display for CollectorBug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:#}", self.0) + } +} + /// Build an inventory [`Collection`] /// /// This interface is oriented around the interfaces used by an actual @@ -37,7 +69,7 @@ use uuid::Uuid; #[derive(Debug)] pub struct CollectionBuilder { // For field documentation, see the corresponding fields in `Collection`. - errors: Vec, + errors: Vec, time_started: DateTime, collector: String, baseboards: BTreeSet>, @@ -76,11 +108,7 @@ impl CollectionBuilder { pub fn build(self) -> Collection { Collection { id: Uuid::new_v4(), - errors: self - .errors - .into_iter() - .map(|e| format!("{:#}", e)) - .collect(), + errors: self.errors.into_iter().map(|e| e.to_string()).collect(), time_started: self.time_started, time_done: now(), collector: self.collector, @@ -115,12 +143,12 @@ impl CollectionBuilder { // can stick it into a u16 (which still seems generous). This will // allow us to store it into an Int32 in the database. let Ok(sp_slot) = u16::try_from(slot) else { - self.found_error(anyhow!( + self.found_error(InventoryError::from(anyhow!( "MGS {:?}: SP {:?} slot {}: slot number did not fit into u16", source, sp_type, slot - )); + ))); return None; }; @@ -177,12 +205,12 @@ impl CollectionBuilder { gateway_client::types::RotState::CommunicationFailed { message, } => { - self.found_error(anyhow!( + self.found_error(InventoryError::from(anyhow!( "MGS {:?}: reading RoT state for {:?}: {}", source, baseboard, message - )); + ))); } } @@ -218,7 +246,7 @@ impl CollectionBuilder { which: CabooseWhich, source: &str, caboose: SpComponentCaboose, - ) -> Result<(), anyhow::Error> { + ) -> Result<(), CollectorBug> { // Normalize the caboose contents: i.e., if we've seen this exact // caboose contents before, use the same record from before. Otherwise, // make a new one. @@ -243,7 +271,7 @@ impl CollectionBuilder { }, ) { let error = if *previous.caboose == *sw_caboose { - anyhow!("reported multiple times (same value)",) + anyhow!("reported multiple times (same value)") } else { anyhow!( "reported caboose multiple times (previously {:?}, \ @@ -252,10 +280,10 @@ impl CollectionBuilder { sw_caboose ) }; - Err(error.context(format!( + Err(CollectorBug::from(error.context(format!( "baseboard {:?} caboose {:?}", baseboard, which - ))) + )))) } else { Ok(()) } @@ -290,7 +318,7 @@ impl CollectionBuilder { which: RotPageWhich, source: &str, page: RotPage, - ) -> Result<(), anyhow::Error> { + ) -> Result<(), CollectorBug> { // Normalize the page contents: i.e., if we've seen this exact page // before, use the same record from before. Otherwise, make a new one. let sw_rot_page = Self::normalize_item(&mut self.rot_pages, page); @@ -321,10 +349,10 @@ impl CollectionBuilder { sw_rot_page ) }; - Err(error.context(format!( + Err(CollectorBug::from(error.context(format!( "baseboard {:?} rot page {:?}", baseboard, which - ))) + )))) } else { Ok(()) } @@ -351,11 +379,12 @@ impl CollectionBuilder { /// Record a collection error /// - /// This is used for operational errors encountered during the collection - /// process (e.g., a down MGS instance). It's not intended for mis-uses of - /// this API, which are conveyed instead through returned errors (and should - /// probably cause the caller to stop collection altogether). - pub fn found_error(&mut self, error: anyhow::Error) { + /// See [`InventoryError`] for more on what kinds of errors are reported + /// this way. These errors are stored as part of the collection so that + /// future readers can see what problems might make the collection + /// incomplete. By contrast, [`CollectorBug`]s are not reported and stored + /// this way. + pub fn found_error(&mut self, error: InventoryError) { self.errors.push(error); } } diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 7c6570436a..aeca6e43a1 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -5,6 +5,7 @@ //! Collection of inventory from Omicron components use crate::builder::CollectionBuilder; +use crate::builder::InventoryError; use anyhow::Context; use gateway_client::types::GetCfpaParams; use gateway_client::types::RotCfpaSlot; @@ -93,7 +94,7 @@ impl Collector { // being able to identify this particular condition. let sps = match ignition_result { Err(error) => { - self.in_progress.found_error(error); + self.in_progress.found_error(InventoryError::from(error)); return; } @@ -129,7 +130,7 @@ impl Collector { }); let sp_state = match result { Err(error) => { - self.in_progress.found_error(error); + self.in_progress.found_error(InventoryError::from(error)); continue; } Ok(response) => response.into_inner(), @@ -179,7 +180,8 @@ impl Collector { }); let caboose = match result { Err(error) => { - self.in_progress.found_error(error); + self.in_progress + .found_error(InventoryError::from(error)); continue; } Ok(response) => response.into_inner(), @@ -257,7 +259,8 @@ impl Collector { let page = match result { Err(error) => { - self.in_progress.found_error(error); + self.in_progress + .found_error(InventoryError::from(error)); continue; } Ok(data_base64) => RotPage { data_base64 }, diff --git a/nexus/inventory/src/lib.rs b/nexus/inventory/src/lib.rs index 3a5f60b387..e92c46916d 100644 --- a/nexus/inventory/src/lib.rs +++ b/nexus/inventory/src/lib.rs @@ -23,5 +23,7 @@ pub mod examples; // only exposed for test code to construct collections pub use builder::CollectionBuilder; +pub use builder::CollectorBug; +pub use builder::InventoryError; pub use collector::Collector; From 9e67e3b0d2c4e7333f35381e7e9d920e21f41851 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 17:12:07 -0800 Subject: [PATCH 087/186] Update Rust crate libc to 0.2.151 (#4665) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a032c9680..a49ec9948a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3614,9 +3614,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libdlpi-sys" diff --git a/Cargo.toml b/Cargo.toml index 7ea34b795f..9ed182e6c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -230,7 +230,7 @@ itertools = "0.12.0" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" lazy_static = "1.4.0" -libc = "0.2.150" +libc = "0.2.151" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index f462fd5b6d..aa2461b980 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -63,7 +63,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.150", features = ["extra_traits"] } +libc = { version = "0.2.151", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } @@ -166,7 +166,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.150", features = ["extra_traits"] } +libc = { version = "0.2.151", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } From f7ee59748a9f8c51e0981e2be3c3047d433fbd49 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 11 Dec 2023 23:31:12 -0800 Subject: [PATCH 088/186] [wicketd] fix additional race with test_update_races (#4675) The issue here was that we'd send a notification to the test that the task was finished, as the last thing in the task. This meant that there was a period where `task.is_finished()` was false but the test thought the task had finished and proceeded accordingly. To fix this, use an `Arc` to indicate that the task is finished. Fixes #4668. --- wicketd/src/update_tracker.rs | 76 ++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index 7faaa08a28..336333f899 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -52,6 +52,7 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; use std::io; use std::net::SocketAddrV6; +use std::sync::atomic::AtomicBool; use std::sync::Arc; use std::sync::Mutex as StdMutex; use std::time::Duration; @@ -93,7 +94,8 @@ use wicket_common::update_events::UpdateTerminalError; #[derive(Debug)] struct SpUpdateData { - task: JoinHandle<()>, + // See the documentation for is_finished. + finished: Arc, abort_handle: AbortHandle, // Note: Our mutex here is a standard mutex, not a tokio mutex. We generally // hold it only log enough to update its state or push a new update event @@ -101,6 +103,20 @@ struct SpUpdateData { event_buffer: Arc>, } +impl SpUpdateData { + /// Returns true if the update has reached a terminal state. + /// + /// To check whether an update has finished, we used to store the + /// JoinHandle to the task and check `task.is_finished()`. However, there + /// are some minor things we do after finishing the update (e.g. in the + /// case of a fake update, sending a message indicating that the update has + /// finished). So instead, we use a boolean as a flag to indicate when the + /// task has finished doing the bulk of its work. + fn is_finished(&self) -> bool { + self.finished.load(std::sync::atomic::Ordering::Acquire) + } +} + #[derive(Debug)] enum UploadTrampolinePhase2ToMgsStatus { Running { hash: ArtifactHash }, @@ -294,19 +310,10 @@ impl UpdateTracker { ); } // Occupied: we've previously started an update to this sp. - Entry::Occupied(mut slot) => { - assert!( - slot.get().task.is_finished(), - "we just checked that the task was finished" - ); - slot.insert( - spawn_update_driver - .spawn_update_driver( - sp, - plan.clone(), - &setup_data, - ) - .await, + Entry::Occupied(_) => { + panic!( + "we just checked that there was \ + no update data for this SP" ); } } @@ -497,20 +504,25 @@ impl<'tr> SpawnUpdateDriver for RealSpawnUpdateDriver<'tr> { // ideal, but it works and is the easiest way to send it without // restructuring this code. let (abort_handle_sender, abort_handle_receiver) = oneshot::channel(); - let task = tokio::spawn(update_driver.run( + + let finished = Arc::new(AtomicBool::new(false)); + let finished_indicator = SetTrueOnDrop(finished.clone()); + + tokio::spawn(update_driver.run( plan, update_cx, event_buffer.clone(), ipr_start_receiver, self.opts.clone(), abort_handle_sender, + finished_indicator, )); let abort_handle = abort_handle_receiver .await .expect("abort handle is sent immediately"); - SpUpdateData { task, abort_handle, event_buffer } + SpUpdateData { finished, abort_handle, event_buffer } } } @@ -549,7 +561,10 @@ impl SpawnUpdateDriver for FakeUpdateDriver { .take() .expect("fake step receiver is only taken once"); - let task = tokio::spawn(async move { + let finished = Arc::new(AtomicBool::new(false)); + let finished_indicator = SetTrueOnDrop(finished.clone()); + + tokio::spawn(async move { // The step component and ID have been chosen arbitrarily here -- // they aren't important. let final_sender_handle = engine @@ -579,6 +594,9 @@ impl SpawnUpdateDriver for FakeUpdateDriver { // buffer. event_receiving_task.await.expect("event receiving task panicked"); + // Indicate to the outside world that the update is finished. + std::mem::drop(finished_indicator); + // Finally, notify the receiving end of the inner sender: this // indicates that the update is done. match engine_res { @@ -605,7 +623,7 @@ impl SpawnUpdateDriver for FakeUpdateDriver { } }); - SpUpdateData { task, abort_handle, event_buffer } + SpUpdateData { finished, abort_handle, event_buffer } } } @@ -652,9 +670,7 @@ impl UpdateTrackerData { .filter_map(|sp| { self.sp_update_data .get(sp) - .map_or(false, |update_data| { - !update_data.task.is_finished() - }) + .map_or(false, |update_data| !update_data.is_finished()) .then(|| *sp) }) .collect::>(); @@ -692,7 +708,7 @@ impl UpdateTrackerData { // There's a race possible here between the task finishing and this // check, but that's totally fine: the worst case is that the abort is // ignored. - if update_data.task.is_finished() { + if update_data.is_finished() { return Err(AbortUpdateError::UpdateFinished); } @@ -719,13 +735,13 @@ impl UpdateTrackerData { .sp_update_data .iter() .filter_map(|(sp_identifier, update_data)| { - (!update_data.task.is_finished()).then(|| *sp_identifier) + (!update_data.is_finished()).then(|| *sp_identifier) }) .collect::>(); if !running_sps.is_empty() { return Err(HttpError::for_bad_request( None, - "Updates currently running for {running_sps:?}".to_owned(), + format!("Updates currently running for {running_sps:?}"), )); } @@ -787,10 +803,19 @@ impl AbortUpdateError { } } +struct SetTrueOnDrop(Arc); + +impl Drop for SetTrueOnDrop { + fn drop(&mut self) { + self.0.store(true, std::sync::atomic::Ordering::Release); + } +} + #[derive(Debug)] struct UpdateDriver {} impl UpdateDriver { + #![allow(clippy::too_many_arguments)] async fn run( self, plan: UpdatePlan, @@ -799,6 +824,7 @@ impl UpdateDriver { ipr_start_receiver: IprStartReceiver, opts: StartUpdateOptions, abort_handle_sender: oneshot::Sender, + finished_indicator: SetTrueOnDrop, ) { let update_cx = &update_cx; @@ -1066,6 +1092,8 @@ impl UpdateDriver { // Wait for all events to be received and written to the update log. event_receiving_task.await.expect("event receiving task panicked"); + // This would happen anyway, but be explicit about the drop. + std::mem::drop(finished_indicator); } fn register_sled_steps<'a>( From 027c9b854286d67d5f9ab294962f1b3adaaecb0b Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 12 Dec 2023 10:20:40 -0800 Subject: [PATCH 089/186] Oximeter self-stat tests don't need to be based on time (#4670) - Fixes #4657 - Oximeter self stat tests run a few collections, and verify that the relevant counters keep track of those. Previously, that was based on _time_, advancing time forward until the expected number of collections actually occur. There's nothing really guaranteeing that those collections do occur, since the tasks may not run exactly when we expect. This commit updates the tests so that the producer explicitly updates a shared counter every time they receive a request. The tests then reduce to asserting this counter equals the self-stat counter. --- oximeter/collector/src/agent.rs | 54 ++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 4135125a48..8fff44bb2d 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -654,6 +654,8 @@ mod tests { use std::net::Ipv6Addr; use std::net::SocketAddr; use std::net::SocketAddrV6; + use std::sync::atomic::AtomicU64; + use std::sync::atomic::Ordering; use std::time::Duration; use tokio::sync::oneshot; use tokio::time::Instant; @@ -667,7 +669,8 @@ mod tests { // timers complete as expected. const TICK_INTERVAL: Duration = Duration::from_millis(10); - // Total number of collection attempts. + // Total number of collection attempts, and the expected number of + // collections which fail in the "unreachability" test below. const N_COLLECTIONS: u64 = 5; // Period these tests wait using `tokio::time::advance()` before checking @@ -677,6 +680,12 @@ mod tests { + COLLECTION_INTERVAL.as_millis() as u64 / 2, ); + // The number of actual successful test collections. + static N_SUCCESSFUL_COLLECTIONS: AtomicU64 = AtomicU64::new(0); + + // The number of actual failed test collections. + static N_FAILED_COLLECTIONS: AtomicU64 = AtomicU64::new(0); + // Test that we count successful collections from a target correctly. #[tokio::test] async fn test_self_stat_collection_count() { @@ -697,13 +706,11 @@ mod tests { // will be no actual data here, but the sample counter will increment. let addr = SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0)); - async fn handler( - _: Request, - ) -> Result, Infallible> { - Ok(Response::new(Body::from("[]"))) - } let make_svc = make_service_fn(|_conn| async { - Ok::<_, Infallible>(service_fn(handler)) + Ok::<_, Infallible>(service_fn(|_: Request| async { + N_SUCCESSFUL_COLLECTIONS.fetch_add(1, Ordering::SeqCst); + Ok::<_, Infallible>(Response::new(Body::from("[]"))) + })) }); let server = Server::bind(&addr).serve(make_svc); let address = server.local_addr(); @@ -722,7 +729,11 @@ mod tests { .await .expect("failed to register dummy producer"); - // Step time until there has been exactly `N_COLLECTIONS` collections. + // Step time for a few collections. + // + // Due to scheduling variations, we don't verify the number of + // collections we expect based on time, but we instead check that every + // collection that _has_ occurred bumps the counter. tokio::time::pause(); let now = Instant::now(); while now.elapsed() < TEST_WAIT_PERIOD { @@ -744,7 +755,10 @@ mod tests { .await .expect("failed to request statistics from task"); let stats = rx.await.expect("failed to receive statistics from task"); - assert_eq!(stats.collections.datum.value(), N_COLLECTIONS); + assert_eq!( + stats.collections.datum.value(), + N_SUCCESSFUL_COLLECTIONS.load(Ordering::SeqCst) + ); assert!(stats.failed_collections.is_empty()); logctx.cleanup_successful(); } @@ -837,15 +851,13 @@ mod tests { // And a dummy server that will always fail with a 500. let addr = SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0)); - async fn handler( - _: Request, - ) -> Result, Infallible> { - let mut res = Response::new(Body::from("im ded")); - *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; - Ok(res) - } let make_svc = make_service_fn(|_conn| async { - Ok::<_, Infallible>(service_fn(handler)) + Ok::<_, Infallible>(service_fn(|_: Request| async { + N_FAILED_COLLECTIONS.fetch_add(1, Ordering::SeqCst); + let mut res = Response::new(Body::from("im ded")); + *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + Ok::<_, Infallible>(res) + })) }); let server = Server::bind(&addr).serve(make_svc); let address = server.local_addr(); @@ -865,6 +877,12 @@ mod tests { .expect("failed to register flaky producer"); // Step time until there has been exactly `N_COLLECTIONS` collections. + // + // NOTE: This is technically still a bit racy, in that the server task + // may have made a different number of attempts than we expect. In + // practice, we've not seen this one fail, so basing the number of + // counts on time seems reasonable, especially since we don't have other + // low-cost options for verifying the behavior. tokio::time::pause(); let now = Instant::now(); while now.elapsed() < TEST_WAIT_PERIOD { @@ -894,7 +912,7 @@ mod tests { .unwrap() .datum .value(), - N_COLLECTIONS, + N_FAILED_COLLECTIONS.load(Ordering::SeqCst), ); assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); From 07eb7dafc20e35e44edf429fcbb759cbb33edd5f Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 12 Dec 2023 13:02:51 -0600 Subject: [PATCH 090/186] Fix oximeter test flake due to timestamp collision (#4679) Closes #4678 --- oximeter/db/src/client.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index d295d0dcdf..299af33fb7 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -3145,12 +3145,18 @@ mod tests { serde_json::Value: From, { let datum = Datum::from(hist); - let measurement = Measurement::new(Utc::now(), datum); + + // We artificially give different timestamps to avoid a test flake in + // CI (reproducible reliably on macOS) where the two Utc::now() are the + // same, which means we get two results on retrieval when we expect one + let t1 = Utc::now(); + let t2 = t1 + Duration::from_nanos(1); + + let measurement = Measurement::new(t1, datum); let missing_datum = Datum::Missing( - MissingDatum::new(measurement.datum_type(), Some(Utc::now())) - .unwrap(), + MissingDatum::new(measurement.datum_type(), Some(t2)).unwrap(), ); - let missing_measurement = Measurement::new(Utc::now(), missing_datum); + let missing_measurement = Measurement::new(t2, missing_datum); test_recall_measurement_impl(measurement, client).await?; test_recall_measurement_impl(missing_measurement, client).await?; Ok(()) From 47065ac1cf1e7ab45f265edd30cb02568588661f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:53:36 -0800 Subject: [PATCH 091/186] Update Rust crate vsss-rs to 3.3.2 (#4676) --- Cargo.lock | 8 ++++---- bootstore/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a49ec9948a..3eb87ade1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1385,9 +1385,9 @@ dependencies = [ [[package]] name = "curve25519-dalek" -version = "4.1.0" +version = "4.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622178105f911d937a42cdb140730ba4a3ed2becd8ae6ce39c7d28b5d75d4588" +checksum = "e89b8c6a2e4b1f45971ad09761aafb85514a84744b67a95e32c3cc1352d1f65c" dependencies = [ "cfg-if", "cpufeatures", @@ -9355,9 +9355,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vsss-rs" -version = "3.3.1" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f368a01a79af8f2fa45e20a2a478a9799c631c4a7c598563e2c94b2211f750cb" +checksum = "a18cf462590a38451396633ef9771f3e9afcf439309137fd6c6eaaec0fb38252" dependencies = [ "curve25519-dalek", "elliptic-curve", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 93eb6a3c48..1eade5afe8 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -27,7 +27,7 @@ slog.workspace = true thiserror.workspace = true tokio.workspace = true uuid.workspace = true -vsss-rs = { version = "3.3.1", features = ["std", "curve25519"] } +vsss-rs = { version = "3.3.2", features = ["std", "curve25519"] } zeroize.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. From 76a35f25ac360c67cff2729aa3d735699fc49130 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 13:57:27 -0800 Subject: [PATCH 092/186] Update Rust crate mockall to 0.12 (#4663) --- Cargo.lock | 32 +++++++++----------------------- Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3eb87ade1f..d9b0f91d5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -237,7 +237,7 @@ dependencies = [ "anstyle", "bstr 1.6.0", "doc-comment", - "predicates 3.0.4", + "predicates", "predicates-core", "predicates-tree", "wait-timeout", @@ -3937,29 +3937,29 @@ dependencies = [ [[package]] name = "mockall" -version = "0.11.4" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" +checksum = "1a978c8292954bcb9347a4e28772c0a0621166a1598fc1be28ac0076a4bb810e" dependencies = [ "cfg-if", "downcast", "fragile", "lazy_static", "mockall_derive", - "predicates 2.1.5", + "predicates", "predicates-tree", ] [[package]] name = "mockall_derive" -version = "0.11.4" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" +checksum = "ad2765371d0978ba4ace4ebef047baa62fc068b431e468444b5610dd441c639b" dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.32", ] [[package]] @@ -5084,7 +5084,7 @@ dependencies = [ "petgraph", "postgres-types", "ppv-lite86", - "predicates 3.0.4", + "predicates", "proc-macro2", "rand 0.8.5", "rand_chacha 0.3.1", @@ -6030,20 +6030,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -[[package]] -name = "predicates" -version = "2.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" -dependencies = [ - "difflib", - "float-cmp", - "itertools 0.10.5", - "normalize-line-endings", - "predicates-core", - "regex", -] - [[package]] name = "predicates" version = "3.0.4" @@ -8982,7 +8968,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "predicates 3.0.4", + "predicates", "slog", "slog-async", "slog-envlogger", diff --git a/Cargo.toml b/Cargo.toml index 9ed182e6c3..9fc450878b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,7 +235,7 @@ linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mime_guess = "2.0.4" -mockall = "0.11" +mockall = "0.12" newtype_derive = "0.1.6" mg-admin-client = { path = "clients/mg-admin-client" } nexus-client = { path = "clients/nexus-client" } From 343835c1b2099e50205fb41be1341c2f58dd43ec Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 12 Dec 2023 14:15:42 -0800 Subject: [PATCH 093/186] [nexus] Allow silo admins to upload new certs (#4669) The additional cert validation added in #4100 broke the ability for silo admins to upload new certs, because it introduced a call to fetch the rack DNS configuration (in order to assemble the FQDNs for the silo to check that the cert is valid for them). This PR fixes that by using an elevated internal privilege for that DNS config lookup. Fixes #4532. --- nexus/src/app/certificate.rs | 18 +++++- nexus/tests/integration_tests/silos.rs | 82 ++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/certificate.rs b/nexus/src/app/certificate.rs index 71be93f5b7..2f130d1ad3 100644 --- a/nexus/src/app/certificate.rs +++ b/nexus/src/app/certificate.rs @@ -47,8 +47,22 @@ impl super::Nexus { .silo_required() .internal_context("creating a Certificate")?; - let silo_fq_dns_names = - self.silo_fq_dns_names(opctx, authz_silo.id()).await?; + // The `opctx` we received is going to be checked for permission to + // create a cert below in `db_datastore.certificate_create`, but first + // we need to look up this silo's fully-qualified domain names in order + // to check that the cert we've been given is valid for this silo. + // Looking up DNS names requires reading the DNS configuration of the + // _rack_, which this user may not be able to do (even if they have + // permission to upload new certs, which almost certainly implies a + // silo-level admin). We'll use our `opctx_external_authn()` context, + // which is the same context used to create a silo. This is a higher + // privilege than the current user may have, but we believe it does not + // leak any information that a silo admin doesn't already know (the + // external DNS name(s) of the rack, which leads to their silo's DNS + // name(s)). + let silo_fq_dns_names = self + .silo_fq_dns_names(self.opctx_external_authn(), authz_silo.id()) + .await?; let kind = params.service; let new_certificate = db::model::Certificate::new( diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index e5f41d294d..3c69c8b7cd 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::integration_tests::saml::SAML_IDP_DESCRIPTOR; +use dropshot::ResultsPage; use nexus_db_queries::authn::silos::{ AuthenticatedSubject, IdentityProviderType, }; @@ -19,6 +20,7 @@ use nexus_test_utils::resource_helpers::{ objects_list_page_authz, projects_list, }; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views::Certificate; use nexus_types::external_api::views::{ self, IdentityProvider, Project, SamlIdentityProvider, Silo, }; @@ -27,6 +29,7 @@ use omicron_common::api::external::ObjectIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, LookupType, Name, }; +use omicron_test_utils::certificates::CertificateChain; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use std::collections::{BTreeMap, BTreeSet, HashSet}; @@ -2437,3 +2440,82 @@ async fn check_fleet_privileges( .unwrap(); } } + +// Test that a silo admin can create new certificates for their silo +// +// Internally, the certificate validation check requires the `authz::DNS_CONFIG` +// resource (to check that the certificate is valid for +// `{silo_name}.{external_dns_zone_name}`), which silo admins may not have. We +// have to use an alternate, elevated context to perform that check, and this +// test confirms we do so. +#[nexus_test] +async fn test_silo_admin_can_create_certs(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let certs_url = "/v1/certificates"; + + // Create a silo with an admin user + let silo = create_silo( + client, + "silo-name", + true, + shared::SiloIdentityMode::LocalOnly, + ) + .await; + + let new_silo_user_id = create_local_user( + client, + &silo, + &"admin".parse().unwrap(), + params::UserPassword::LoginDisallowed, + ) + .await + .id; + + grant_iam( + client, + "/v1/system/silos/silo-name", + SiloRole::Admin, + new_silo_user_id, + AuthnMode::PrivilegedUser, + ) + .await; + + // The user should be able to create certs for this silo + let chain = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); + let (cert, key) = + (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_pem()); + + let cert: Certificate = NexusRequest::objects_post( + client, + certs_url, + ¶ms::CertificateCreate { + identity: IdentityMetadataCreateParams { + name: "test-cert".parse().unwrap(), + description: "the test cert".to_string(), + }, + cert, + key, + service: shared::ServiceUsingCertificate::ExternalApi, + }, + ) + .authn_as(AuthnMode::SiloUser(new_silo_user_id)) + .execute() + .await + .expect("failed to create certificate") + .parsed_body() + .unwrap(); + + // The cert should exist when listing the silo's certs as the silo admin + let silo_certs = + NexusRequest::object_get(client, &format!("{certs_url}?limit=10")) + .authn_as(AuthnMode::SiloUser(new_silo_user_id)) + .execute() + .await + .expect("failed to list certificates") + .parsed_body::>() + .expect("failed to parse body as ResultsPage") + .items; + + assert_eq!(silo_certs.len(), 1); + assert_eq!(silo_certs[0].identity.id, cert.identity.id); +} From 2d95aacd92dfd5f8f77a504ab3165fb328f4b5f7 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 12 Dec 2023 15:40:55 -0800 Subject: [PATCH 094/186] Support a restricted SQL subset for querying timeseries (#4475) - Add methods for querying oximeter timeseries using a limited subset of SQL. The raw string is heavily validated, and only simple SELECT queries are currently supported, with a limited subset of ClickHouse functions. Still, this allows running many different kinds of queries, including aggregations, window functions, and joins. - Adds a few types for reading out tabular data from the query result, and parsing basic metadata for understanding resource usage of the queries. - Add a barebones SQL shell for running oximeter SQL queries, using the new `Client::query` method. - Include a bunch of tests for the restricted SQL subset as well as correctness of the actual returned queries against the DB. --- Cargo.lock | 157 ++- Cargo.toml | 3 + oximeter/db/Cargo.toml | 29 +- oximeter/db/README-oxdb-sql.md | 219 ++++ oximeter/db/src/bin/oxdb.rs | 311 ++++- oximeter/db/src/client.rs | 294 ++++- oximeter/db/src/lib.rs | 39 +- oximeter/db/src/query.rs | 2 + oximeter/db/src/sql/mod.rs | 1358 +++++++++++++++++++++ oximeter/db/test-output/sql/00/query.sql | 1 + oximeter/db/test-output/sql/00/result.txt | 10 + oximeter/db/test-output/sql/01/query.sql | 1 + oximeter/db/test-output/sql/01/result.txt | 10 + oximeter/db/test-output/sql/02/query.sql | 1 + oximeter/db/test-output/sql/02/result.txt | 10 + oximeter/db/test-output/sql/03/query.sql | 4 + oximeter/db/test-output/sql/03/result.txt | 43 + oximeter/db/test-output/sql/04/query.sql | 5 + oximeter/db/test-output/sql/04/result.txt | 56 + oximeter/oximeter/src/schema.rs | 3 +- oximeter/oximeter/src/types.rs | 5 + wicket-dbg/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 +- 23 files changed, 2510 insertions(+), 57 deletions(-) create mode 100644 oximeter/db/README-oxdb-sql.md create mode 100644 oximeter/db/src/sql/mod.rs create mode 100644 oximeter/db/test-output/sql/00/query.sql create mode 100644 oximeter/db/test-output/sql/00/result.txt create mode 100644 oximeter/db/test-output/sql/01/query.sql create mode 100644 oximeter/db/test-output/sql/01/result.txt create mode 100644 oximeter/db/test-output/sql/02/query.sql create mode 100644 oximeter/db/test-output/sql/02/result.txt create mode 100644 oximeter/db/test-output/sql/03/query.sql create mode 100644 oximeter/db/test-output/sql/03/result.txt create mode 100644 oximeter/db/test-output/sql/04/query.sql create mode 100644 oximeter/db/test-output/sql/04/result.txt diff --git a/Cargo.lock b/Cargo.lock index d9b0f91d5e..7bf813233c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -213,6 +213,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "arrayvec" version = "0.7.4" @@ -557,7 +563,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c2f0dc9a68c6317d884f97cc36cf5a3d20ba14ce404227df55e1af708ab04bc" dependencies = [ "arrayref", - "arrayvec", + "arrayvec 0.7.4", "constant_time_eq 0.2.6", ] @@ -1003,6 +1009,17 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" +[[package]] +name = "clipboard-win" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +dependencies = [ + "error-code", + "str-buf", + "winapi", +] + [[package]] name = "cobs" version = "0.2.3" @@ -1257,6 +1274,23 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" +dependencies = [ + "bitflags 1.3.2", + "crossterm_winapi", + "libc", + "mio", + "parking_lot 0.12.1", + "serde", + "signal-hook", + "signal-hook-mio", + "winapi", +] + [[package]] name = "crossterm" version = "0.27.0" @@ -1269,7 +1303,6 @@ dependencies = [ "libc", "mio", "parking_lot 0.12.1", - "serde", "signal-hook", "signal-hook-mio", "winapi", @@ -2161,6 +2194,16 @@ dependencies = [ "libc", ] +[[package]] +name = "error-code" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" +dependencies = [ + "libc", + "str-buf", +] + [[package]] name = "expectorate" version = "1.1.0" @@ -2547,7 +2590,7 @@ dependencies = [ "hubpack 0.1.2", "hubtools", "lru-cache", - "nix", + "nix 0.26.2", "once_cell", "paste", "serde", @@ -4303,6 +4346,17 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nodrop" version = "0.1.14" @@ -5037,7 +5091,6 @@ dependencies = [ "const-oid", "crossbeam-epoch", "crossbeam-utils", - "crossterm", "crypto-common", "der", "diesel", @@ -5074,6 +5127,7 @@ dependencies = [ "managed", "memchr", "mio", + "nom", "num-bigint", "num-integer", "num-iter", @@ -5444,14 +5498,18 @@ dependencies = [ "clap 4.4.3", "dropshot", "expectorate", + "futures", "highway", + "indexmap 2.1.0", "itertools 0.12.0", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "reedline", "regex", "reqwest", + "rustyline", "schemars", "serde", "serde_json", @@ -5459,7 +5517,10 @@ dependencies = [ "slog-async", "slog-dtrace", "slog-term", + "sqlformat", + "sqlparser", "strum", + "tabled", "tempfile", "thiserror", "tokio", @@ -6457,7 +6518,7 @@ checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad" dependencies = [ "bitflags 2.4.0", "cassowary", - "crossterm", + "crossterm 0.27.0", "indoc 2.0.3", "itertools 0.11.0", "paste", @@ -6547,12 +6608,12 @@ dependencies = [ [[package]] name = "reedline" -version = "0.26.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a093a20a6c473247c2e9971aaf4cedf9041bcd3f444dc7fad667d3b6b7a5fd" +checksum = "c2fde955d11817fdcb79d703932fb6b473192cb36b6a92ba21f7f4ac0513374e" dependencies = [ "chrono", - "crossterm", + "crossterm 0.26.1", "fd-lock", "itertools 0.10.5", "nu-ansi-term", @@ -7087,6 +7148,29 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rustyline" +version = "12.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix 0.26.4", + "radix_trie", + "scopeguard", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "winapi", +] + [[package]] name = "ryu" version = "1.0.15" @@ -7981,6 +8065,38 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "sqlformat" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +dependencies = [ + "itertools 0.12.0", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlparser" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -8015,6 +8131,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "str-buf" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" + [[package]] name = "string_cache" version = "0.8.7" @@ -8041,9 +8163,9 @@ dependencies = [ [[package]] name = "strip-ansi-escapes" -version = "0.2.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ff8ef943b384c414f54aefa961dd2bd853add74ec75e7ac74cf91dba62bcfa" +checksum = "011cbb39cf7c1f62871aea3cc46e5817b0937b49e9447370c93cacbe93a766d8" dependencies = [ "vte", ] @@ -9163,6 +9285,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "universal-hash" version = "0.5.1" @@ -9359,10 +9487,11 @@ dependencies = [ [[package]] name = "vte" -version = "0.11.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197" +checksum = "6cbce692ab4ca2f1f3047fcf732430249c0e971bfdd2b234cf2c47ad93af5983" dependencies = [ + "arrayvec 0.5.2", "utf8parse", "vte_generate_state_changes", ] @@ -9552,7 +9681,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm", + "crossterm 0.27.0", "futures", "humantime", "indexmap 2.1.0", @@ -9613,7 +9742,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm", + "crossterm 0.27.0", "omicron-workspace-hack", "reedline", "serde", diff --git a/Cargo.toml b/Cargo.toml index 9fc450878b..2ce3bcafb4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -302,6 +302,7 @@ rand = "0.8.5" ratatui = "0.23.0" rayon = "1.8" rcgen = "0.11.3" +reedline = "0.22.0" ref-cast = "1.0" regex = "1.10.2" regress = "0.7.1" @@ -311,6 +312,7 @@ rpassword = "7.3.1" rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.21.9" +rustyline = "12.0.0" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } schemars = "0.8.12" secrecy = "0.8.0" @@ -344,6 +346,7 @@ sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } +sqlparser = { version = "0.36.1", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index 4d53869d0d..99985a3b80 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -9,27 +9,46 @@ license = "MPL-2.0" anyhow.workspace = true async-trait.workspace = true bcs.workspace = true -bytes = { workspace = true, features = [ "serde" ] } camino.workspace = true chrono.workspace = true clap.workspace = true dropshot.workspace = true +futures.workspace = true highway.workspace = true +indexmap.workspace = true omicron-common.workspace = true +omicron-workspace-hack.workspace = true oximeter.workspace = true +reedline.workspace = true regex.workspace = true -reqwest = { workspace = true, features = [ "json" ] } -schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } +rustyline.workspace = true serde.workspace = true serde_json.workspace = true slog.workspace = true slog-async.workspace = true slog-term.workspace = true +sqlparser.workspace = true +sqlformat = "0.2.2" +tabled.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = [ "rt-multi-thread", "macros" ] } usdt.workspace = true uuid.workspace = true -omicron-workspace-hack.workspace = true + +[dependencies.bytes] +workspace = true +features = [ "serde" ] + +[dependencies.reqwest] +workspace = true +features = [ "json" ] + +[dependencies.schemars] +workspace = true +features = [ "uuid1", "bytes", "chrono" ] + +[dependencies.tokio] +workspace = true +features = [ "rt-multi-thread", "macros" ] [dev-dependencies] expectorate.workspace = true diff --git a/oximeter/db/README-oxdb-sql.md b/oximeter/db/README-oxdb-sql.md new file mode 100644 index 0000000000..8ebecdd409 --- /dev/null +++ b/oximeter/db/README-oxdb-sql.md @@ -0,0 +1,219 @@ +# `oxdb sql` + +This is a short how-to for using SQL to query timeseries. If you're eager to get +started, find a ClickHouse server with `oximeter` data, and run: + +```console +oxdb --address $CLICKHOUSE_ADDR sql +``` + +You can use `help` to get a help menu on the CLI, or run `\l` to list available +timeseries to start querying. + +## `oximeter` overview + +In general, `oximeter`'s architecture and data model are laid out in RFDs 161 +and 162. These provide a good detailed look at the system. + +### Terminology + +`oximeter` is the subsystem for describing, collecting, and storing telemetry +data from the Oxide rack. Software components make data available to an +`oximeter` collector in the form of _samples_, which are timestamped datapoints +from a single timeseries. + +Timeseries are named for their _target_, the component being measured or +monitored, and the _metric_, the measured feature or aspect of the target. The +timeseries name is derived as `target_name:metric_name`. The target and metric +can both have name-value pairs called _fields_, and the metric additionally has +a _measurement_, the actual measured value. Both are strongly typed. + +### Data normalization + +As samples are collected, `oximeter` normalizes them before storing in +ClickHouse. The database consists of a set of tables for fields and +measurements, with each _type_ stored in a different table. For fields, the name +of the field is also stored; for measurements, the timestamp and actual datum +are stored. Additionally, one table stores all the received _timeseries schema_, +which describes the name, fields, and measurement types for each timeseries. + +Normalizing the tables has many benefits. Less duplicated data is stored; +simpler, more static table arrangements; better compression; and more. It does +have drawbacks. Querying becomes especially tricky, because one needs to join +many tables together the reconstitute the original samples. This is exacerbated +by ClickHouse's lack of unique primary keys, which means we need to generate a +tag used to associated records from a single timeseries. These are called +_timeseries keys_, and are just hashes computed when a sample is received. + +## Oximeter SQL + +While writing the full set of join expressions needed to denormalize samples is +not very human-friendly, it _is_ relatively easy to generate these in code. +Using the stored timeseries schema and timeseries keys, one can write a (huge) +join expression that results in the full timeseries _as if_ it were a real table +in the database. `oxdb sql` generates this expression, and then runs whatever +query the user supplied on _the resulting in-memory table_. + +### Basic commands + +After starting the SQL shell with `oxdb sql`, one can run the following basic +operations: + +- `\h` or `help` will print a _help_ menu +- `\l` will _list_ all available timeseries by name +- `\d ` will _describe_ the schema of a single named timeseries +- `\f` will list supported ClickHouse functions and `\f ` will print + more details about the function and its usage + +### SQL + +In general, normal ANSI SQL is supported. Instead of _table_, however, one +queries against a _timeseries_. For example: + +```sql +SELECT count() FROM physical_data_link:bytes_received; +``` + +This will return the total number of samples in the timeseries representing the +number of bytes received on an Ethernet data link on a Gimlet. Here are the +available fields: + +```console +0x〉\d physical_data_link:bytes_received + hostname | link_name | rack_id | serial | sled_id | timestamp | start_time | datum +----------+-----------+---------+--------+---------+------------+------------+--------------- + String | String | Uuid | String | Uuid | DateTime64 | DateTime64 | CumulativeU64 +``` + +Any of these fields can be queried, including aggregations, groupings, etc. + +```console +0x〉select min(timestamp), max(timestamp) from physical_data_link:bytes_received; + + min(timestamp) | max(timestamp) +---------------------------------+--------------------------------- + "2023-11-09 04:24:53.284336528" | "2023-11-09 22:12:58.986751414" + +Metadata + Query ID: 66e68db5-8792-4e48-af2d-e5a2a117ab0d + Result rows: 1 + Time: 72.371047ms + Read: 19736 rows (1292186 bytes) + +``` + +or + +```console +0x〉select distinct route from http_service:request_latency_histogram where name = 'nexus-internal'; + + route +------------------------------------------------------------------------------------------------- + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/d462a7f7-b628-40fe-80ff-4e4189e2d62b" + "/metrics/producers" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/616b26df-e62a-4c68-b506-f4a923d8aaf7" + "/metrics/collect/1e9a8843-2327-4d59-94b2-14f909b6f207" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/f4b4dc87-ab46-49fb-a4b4-d361ae214c03" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/a462a7f7-b628-40fe-80ff-4e4189e2d62b" + "/metrics/collect/4b795850-8320-4b7d-9048-aa277653ab8e" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/14b4dc87-ab46-49fb-a4b4-d361ae214c03" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/e4b4dc87-ab46-49fb-a4b4-d361ae214c03" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/cd70d7f6-2354-4bf2-8012-55bf9eaf7930" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/31bd71cd-4736-4a12-a387-9b74b050396f" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/b462a7f7-b628-40fe-80ff-4e4189e2d62b" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/24b4dc87-ab46-49fb-a4b4-d361ae214c03" + "/physical-disk" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803" + "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/ceb4461c-cf56-4719-ad3c-14430bfdfb60" + "/metrics/collect/5c4f4629-1325-4123-bdcd-01bc9a18d740" + "/metrics/collectors" + +Metadata + Query ID: 3107c7ca-6906-4ce9-9d57-19cf0c1d6c71 + Result rows: 18 + Time: 119.387667ms + Read: 206840 rows (14749196 bytes) + +``` + +or + +```console +0x〉select link_name, formatReadableSize(max(datum)) from physical_data_link:bytes_sent group by link_name; + + link_name | formatReadableSize(max(datum)) +-----------+-------------------------------- + "net0" | "5.96 MiB" + "net1" | "0.00 B" + +Metadata + Query ID: cd101b14-a91e-419b-b2d0-633047db219e + Result rows: 2 + Time: 56.036663ms + Read: 27025 rows (1558430 bytes) + +``` + +> Note the metadata at the bottom. The query ID is assigned by the server, which + also returns the number of rows / bytes read. The _time_ includes the server + processing time and the network time, and is usually dominated by the latter. + +### JOINs + +SQL joins are also supported, as long as they are either _inner joins_ or the +ClickHoouse-specific _asof join_. Inner joins are pretty standard, but `ASOF +JOIN` is unique and very useful. It provides a way to match up rows that do not +have an _exact_ equal in each table. As an example, we can use this to match up +metrics from different timeseries + +```console +0x〉select timestamp, datum as bytes_received, s.timestamp, s.datum as bytes_sent from physical_data_link:bytes_received asof join physical_data_link:bytes_sent as s using (link_name, timestamp) where link_name = 'net0' limit 10; + + timestamp | bytes_received | s.timestamp | bytes_sent +---------------------------------+----------------+---------------------------------+------------ + "2023-11-09 04:24:53.284336528" | 0 | "2023-11-09 04:24:53.284336528" | 10661 + "2023-11-09 04:25:06.960255374" | 1064 | "2023-11-09 04:25:06.960255374" | 11937 + "2023-11-09 04:25:16.962286001" | 3748 | "2023-11-09 04:25:16.962286001" | 15910 + "2023-11-09 04:25:26.964768912" | 5278 | "2023-11-09 04:25:26.964768912" | 18465 + "2023-11-09 04:25:36.966422345" | 7146 | "2023-11-09 04:25:36.966422345" | 24423 + "2023-11-09 04:25:46.969640057" | 8032 | "2023-11-09 04:25:46.969640057" | 25370 + "2023-11-09 04:25:57.589902294" | 8868 | "2023-11-09 04:25:57.589902294" | 26277 + "2023-11-09 04:26:07.590262491" | 13120 | "2023-11-09 04:26:07.590262491" | 30225 + "2023-11-09 04:26:17.592895364" | 14584 | "2023-11-09 04:26:17.592895364" | 31501 + "2023-11-09 04:26:27.594820340" | 15344 | "2023-11-09 04:26:27.594820340" | 32211 + +Metadata + Query ID: a13f3abf-9c57-4caa-bfc6-c1b26732d2ad + Result rows: 10 + Time: 124.670777ms + Read: 45858 rows (3331596 bytes) + +``` + +Note that these happen to have exactly the same timestamp, based on how they +were generated, but that need not be the case. + +## Warnings and caveats + +First this is a **prototype**. It is also designed for testing and +experimentation, and little or none of the product should expect this to work on +customer sites any time soon. + +Second, the abstraction here is pretty leaky. SQL expressions that might +normally work against a real table can easily fail here. Please file a bug if +you think something _should_ work. + +Last, and maybe most important, be aware of the resource limitations here. This +all works by constructing an _enormous_ joined table in memory. ClickHouse is +extremely fast, but it is relatively simplistic when it comes to query planning +and optimization. That means it will do exactly what the query says, including +trying to create tables much larger than the available memory. + +For the most part, the blast radius of those problems should be limited to the +ClickHouse zone itself. We also limit the total memory consumption of the +server, currently to 90% of the zone's memory. But since we don't limit the +_zone's_ memory, that's 90% of the physical memory, which is very large indeed. +If you're curious how a query will perform, it's probably a good idea to try it +out on a small subset of data, by adding a `LIMIT` clause or similar. You can +also run `oxdb sql --transform $QUERY_STRING` to print the full query that will +actually be executed on the server. diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb.rs index e14fdeb6a8..17f05c24e2 100644 --- a/oximeter/db/src/bin/oxdb.rs +++ b/oximeter/db/src/bin/oxdb.rs @@ -3,16 +3,27 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Tool for developing against the Oximeter timeseries database, populating data and querying. -// Copyright 2021 Oxide Computer Company + +// Copyright 2023 Oxide Computer Company use anyhow::{bail, Context}; use chrono::{DateTime, Utc}; use clap::{Args, Parser}; +use dropshot::EmptyScanParams; +use dropshot::WhichPage; use oximeter::{ types::{Cumulative, Sample}, Metric, Target, }; +use oximeter_db::sql::function_allow_list; +use oximeter_db::QueryMetadata; +use oximeter_db::QueryResult; +use oximeter_db::Table; use oximeter_db::{query, Client, DbWrite}; +use reedline::DefaultPrompt; +use reedline::DefaultPromptSegment; +use reedline::Reedline; +use reedline::Signal; use slog::{debug, info, o, Drain, Level, Logger}; use std::net::IpAddr; use std::net::SocketAddr; @@ -138,6 +149,12 @@ enum Subcommand { #[clap(long, conflicts_with("end"), action)] end_exclusive: Option>, }, + + /// Enter a SQL shell for interactive querying. + Sql { + #[clap(flatten)] + opts: ShellOptions, + }, } async fn make_client( @@ -295,8 +312,285 @@ async fn query( Ok(()) } +fn print_basic_commands() { + println!("Basic commands:"); + println!(" \\?, \\h, help - Print this help"); + println!(" \\q, quit, exit, ^D - Exit the shell"); + println!(" \\l - List tables"); + println!(" \\d
- Describe a table"); + println!( + " \\f - List or describe ClickHouse SQL functions" + ); + println!(); + println!("Or try entering a SQL `SELECT` statement"); +} + +async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> { + let mut page = WhichPage::First(EmptyScanParams {}); + let limit = 100.try_into().unwrap(); + loop { + let results = client.timeseries_schema_list(&page, limit).await?; + for schema in results.items.iter() { + println!("{}", schema.timeseries_name); + } + if results.next_page.is_some() { + if let Some(last) = results.items.last() { + page = WhichPage::Next(last.timeseries_name.clone()); + } else { + return Ok(()); + } + } else { + return Ok(()); + } + } +} + +async fn describe_virtual_table( + client: &Client, + table: &str, +) -> anyhow::Result<()> { + match table.parse() { + Err(_) => println!("Invalid timeseries name: {table}"), + Ok(name) => { + if let Some(schema) = client.schema_for_timeseries(&name).await? { + let mut cols = + Vec::with_capacity(schema.field_schema.len() + 2); + let mut types = cols.clone(); + for field in schema.field_schema.iter() { + cols.push(field.name.clone()); + types.push(field.field_type.to_string()); + } + cols.push("timestamp".into()); + types.push("DateTime64".into()); + + if schema.datum_type.is_histogram() { + cols.push("start_time".into()); + types.push("DateTime64".into()); + + cols.push("bins".into()); + types.push(format!( + "Array[{}]", + schema + .datum_type + .to_string() + .strip_prefix("Histogram") + .unwrap() + .to_lowercase(), + )); + + cols.push("counts".into()); + types.push("Array[u64]".into()); + } else if schema.datum_type.is_cumulative() { + cols.push("start_time".into()); + types.push("DateTime64".into()); + cols.push("datum".into()); + types.push(schema.datum_type.to_string()); + } else { + cols.push("datum".into()); + types.push(schema.datum_type.to_string()); + } + + let mut builder = tabled::builder::Builder::default(); + builder.set_header(cols); + builder.push_record(types); + println!( + "{}", + builder.build().with(tabled::settings::Style::psql()) + ); + } else { + println!("No such timeseries: {table}"); + } + } + } + Ok(()) +} + +#[derive(Clone, Debug, Args)] +struct ShellOptions { + /// Print query metadata. + #[clap(long = "metadata")] + print_metadata: bool, + /// Print the original SQL query. + #[clap(long = "original")] + print_original_query: bool, + /// Print the rewritten SQL query that is actually run on the DB. + #[clap(long = "rewritten")] + print_rewritten_query: bool, + /// Print the transformed query, but do not run it. + #[clap(long)] + transform: Option, +} + +impl Default for ShellOptions { + fn default() -> Self { + Self { + print_metadata: true, + print_original_query: false, + print_rewritten_query: false, + transform: None, + } + } +} + +fn list_supported_functions() { + println!("Subset of ClickHouse SQL functions currently supported"); + println!( + "See https://clickhouse.com/docs/en/sql-reference/functions for more" + ); + println!(); + for func in function_allow_list().iter() { + println!(" {func}"); + } +} + +fn show_supported_function(name: &str) { + if let Some(func) = function_allow_list().iter().find(|f| f.name == name) { + println!("{}", func.name); + println!(" {}", func.usage); + println!(" {}", func.description); + } else { + println!("No supported function '{name}'"); + } +} + +fn print_sql_query(query: &str) { + println!( + "{}", + sqlformat::format( + &query, + &sqlformat::QueryParams::None, + sqlformat::FormatOptions { uppercase: true, ..Default::default() } + ) + ); + println!(); +} + +fn print_query_metadata(table: &Table, metadata: &QueryMetadata) { + println!("Metadata"); + println!(" Query ID: {}", metadata.id); + println!(" Result rows: {}", table.rows.len()); + println!(" Time: {:?}", metadata.elapsed); + println!(" Read: {}\n", metadata.summary.read); +} + +async fn sql_shell( + address: IpAddr, + port: u16, + log: Logger, + opts: ShellOptions, +) -> anyhow::Result<()> { + let client = make_client(address, port, &log).await?; + + // A workaround to ensure the client has all available timeseries when the + // shell starts. + let dummy = "foo:bar".parse().unwrap(); + let _ = client.schema_for_timeseries(&dummy).await; + + // Possibly just transform the query, but do not execute it. + if let Some(query) = &opts.transform { + let transformed = client.transform_query(query).await?; + println!( + "{}", + sqlformat::format( + &transformed, + &sqlformat::QueryParams::None, + sqlformat::FormatOptions { + uppercase: true, + ..Default::default() + } + ) + ); + return Ok(()); + } + + let mut ed = Reedline::create(); + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic("0x".to_string()), + DefaultPromptSegment::Empty, + ); + println!("Oximeter SQL shell"); + println!(); + print_basic_commands(); + loop { + let sig = ed.read_line(&prompt); + match sig { + Ok(Signal::Success(buf)) => { + let cmd = buf.as_str().trim(); + match cmd { + "" => continue, + "\\?" | "\\h" | "help" => print_basic_commands(), + "\\q" | "quit" | "exit" => return Ok(()), + "\\l" | "\\d" => list_virtual_tables(&client).await?, + _ => { + if let Some(table_name) = cmd.strip_prefix("\\d") { + if table_name.is_empty() { + list_virtual_tables(&client).await?; + } else { + describe_virtual_table( + &client, + table_name.trim().trim_end_matches(';'), + ) + .await?; + } + } else if let Some(func_name) = cmd.strip_prefix("\\f") + { + if func_name.is_empty() { + list_supported_functions(); + } else { + show_supported_function( + func_name.trim().trim_end_matches(';'), + ); + } + } else { + match client.query(&buf).await { + Err(e) => println!("Query failed: {e:#?}"), + Ok(QueryResult { + original_query, + rewritten_query, + metadata, + table, + }) => { + println!(); + let mut builder = + tabled::builder::Builder::default(); + builder.set_header(&table.column_names); + for row in table.rows.iter() { + builder.push_record( + row.iter().map(ToString::to_string), + ); + } + if opts.print_original_query { + print_sql_query(&original_query); + } + if opts.print_rewritten_query { + print_sql_query(&rewritten_query); + } + println!( + "{}\n", + builder.build().with( + tabled::settings::Style::psql() + ) + ); + if opts.print_metadata { + print_query_metadata(&table, &metadata); + } + } + } + } + } + } + } + Ok(Signal::CtrlD) => return Ok(()), + Ok(Signal::CtrlC) => continue, + err => println!("err: {err:?}"), + } + } +} + #[tokio::main] -async fn main() { +async fn main() -> anyhow::Result<()> { + usdt::register_probes().context("Failed to register USDT probes")?; + let args = OxDb::parse(); let decorator = slog_term::TermDecorator::new().build(); let drain = slog_term::FullFormat::new(decorator) @@ -308,12 +602,10 @@ async fn main() { match args.cmd { Subcommand::Describe => describe_data(), Subcommand::Populate { populate_args } => { - populate(args.address, args.port, log, populate_args) - .await - .unwrap(); + populate(args.address, args.port, log, populate_args).await? } Subcommand::Wipe => { - wipe_single_node_db(args.address, args.port, log).await.unwrap() + wipe_single_node_db(args.address, args.port, log).await? } Subcommand::Query { timeseries_name, @@ -342,8 +634,11 @@ async fn main() { start, end, ) - .await - .unwrap(); + .await?; + } + Subcommand::Sql { opts } => { + sql_shell(args.address, args.port, log, opts).await? } } + Ok(()) } diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index 299af33fb7..d6ec01d9fc 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -8,6 +8,7 @@ use crate::model; use crate::query; +use crate::sql::RestrictedQuery; use crate::Error; use crate::Metric; use crate::Target; @@ -22,9 +23,11 @@ use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; +use indexmap::IndexMap; use oximeter::types::Sample; use regex::Regex; use regex::RegexBuilder; +use reqwest::header::HeaderMap; use slog::debug; use slog::error; use slog::info; @@ -41,6 +44,8 @@ use std::ops::Bound; use std::path::Path; use std::path::PathBuf; use std::sync::OnceLock; +use std::time::Duration; +use std::time::Instant; use tokio::fs; use tokio::sync::Mutex; use uuid::Uuid; @@ -51,6 +56,137 @@ mod probes { fn query__done(_: &usdt::UniqueId) {} } +/// A count of bytes / rows accessed during a query. +#[derive(Clone, Copy, Debug)] +pub struct IoCount { + pub bytes: u64, + pub rows: u64, +} + +impl std::fmt::Display for IoCount { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} rows ({} bytes)", self.rows, self.bytes) + } +} + +/// Summary of the I/O and duration of a query. +#[derive(Clone, Copy, Debug, serde::Deserialize)] +#[serde(try_from = "serde_json::Value")] +pub struct QuerySummary { + /// The bytes and rows read by the query. + pub read: IoCount, + /// The bytes and rows written by the query. + pub written: IoCount, +} + +impl TryFrom for QuerySummary { + type Error = Error; + + fn try_from(j: serde_json::Value) -> Result { + use serde_json::Map; + use serde_json::Value; + use std::str::FromStr; + + let Value::Object(map) = j else { + return Err(Error::Database(String::from( + "Expected a JSON object for a metadata summary", + ))); + }; + + fn unpack_summary_value( + map: &Map, + key: &str, + ) -> Result + where + T: FromStr, + ::Err: std::error::Error, + { + let value = map.get(key).ok_or_else(|| { + Error::MissingHeaderKey { key: key.to_string() } + })?; + let Value::String(v) = value else { + return Err(Error::BadMetadata { + key: key.to_string(), + msg: String::from("Expected a string value"), + }); + }; + v.parse::().map_err(|e| Error::BadMetadata { + key: key.to_string(), + msg: e.to_string(), + }) + } + let rows_read: u64 = unpack_summary_value(&map, "read_rows")?; + let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?; + let rows_written: u64 = unpack_summary_value(&map, "written_rows")?; + let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?; + Ok(Self { + read: IoCount { bytes: bytes_read, rows: rows_read }, + written: IoCount { bytes: bytes_written, rows: rows_written }, + }) + } +} + +/// Basic metadata about the resource usage of a single SQL query. +#[derive(Clone, Copy, Debug)] +pub struct QueryMetadata { + /// The database-assigned query ID. + pub id: Uuid, + /// The total duration of the query (network plus execution). + pub elapsed: Duration, + /// Summary of the data read and written. + pub summary: QuerySummary, +} + +impl QueryMetadata { + fn from_headers( + elapsed: Duration, + headers: &HeaderMap, + ) -> Result { + fn get_header<'a>( + map: &'a HeaderMap, + key: &'a str, + ) -> Result<&'a str, Error> { + let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey { + key: key.to_string(), + })?; + std::str::from_utf8(hdr.as_bytes()) + .map_err(|err| Error::Database(err.to_string())) + } + let summary = + serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?) + .map_err(|err| Error::Database(err.to_string()))?; + let id = get_header(headers, "X-ClickHouse-Query-Id")? + .parse() + .map_err(|err: uuid::Error| Error::Database(err.to_string()))?; + Ok(Self { id, elapsed, summary }) + } +} + +/// A tabular result from a SQL query against a timeseries. +#[derive(Clone, Debug, Default, serde::Serialize)] +pub struct Table { + /// The name of each column in the result set. + pub column_names: Vec, + /// The rows of the result set, one per column. + pub rows: Vec>, +} + +/// The full result of running a SQL query against a timeseries. +#[derive(Clone, Debug)] +pub struct QueryResult { + /// The query as written by the client. + pub original_query: String, + /// The rewritten query, run against the JOINed representation of the + /// timeseries. + /// + /// This is the query that is actually run in the database itself. + pub rewritten_query: String, + /// Metadata about the resource usage of the query. + pub metadata: QueryMetadata, + /// The result of the query, with column names and rows. + pub table: Table, +} + /// A `Client` to the ClickHouse metrics database. #[derive(Debug)] pub struct Client { @@ -89,6 +225,76 @@ impl Client { Ok(()) } + /// Transform a SQL query against a timeseries, but do not execute it. + pub async fn transform_query( + &self, + query: impl AsRef, + ) -> Result { + let restricted = RestrictedQuery::new(query.as_ref())?; + restricted.to_oximeter_sql(&*self.schema.lock().await) + } + + /// Run a SQL query against a timeseries. + pub async fn query( + &self, + query: impl AsRef, + ) -> Result { + let original_query = query.as_ref().trim_end_matches(';'); + let ox_sql = self.transform_query(original_query).await?; + let rewritten = format!("{ox_sql} FORMAT JSONEachRow"); + debug!( + self.log, + "rewrote restricted query"; + "original_sql" => &original_query, + "rewritten_sql" => &rewritten, + ); + let request = self + .client + .post(&self.url) + .query(&[ + ("output_format_json_quote_64bit_integers", "0"), + ("database", crate::DATABASE_NAME), + ]) + .body(rewritten.clone()); + let query_start = Instant::now(); + let response = handle_db_response( + request + .send() + .await + .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?, + ) + .await?; + let metadata = QueryMetadata::from_headers( + query_start.elapsed(), + response.headers(), + )?; + let text = response.text().await.unwrap(); + let mut table = Table::default(); + for line in text.lines() { + let row = + serde_json::from_str::>( + line.trim(), + ) + .unwrap(); + if table.column_names.is_empty() { + table.column_names.extend(row.keys().cloned()) + } else { + assert!(table + .column_names + .iter() + .zip(row.keys()) + .all(|(k1, k2)| k1 == k2)); + } + table.rows.push(row.into_values().collect()); + } + Ok(QueryResult { + original_query: original_query.to_string(), + rewritten_query: rewritten, + metadata, + table, + }) + } + /// Select timeseries from criteria on the fields and start/end timestamps. pub async fn select_timeseries_with( &self, @@ -271,7 +477,7 @@ impl Client { ResultsPage::new(schema, &dropshot::EmptyScanParams {}, |schema, _| { schema.timeseries_name.clone() }) - .map_err(|e| Error::Database(e.to_string())) + .map_err(|err| Error::Database(err.to_string())) } /// Read the available schema versions in the provided directory. @@ -1181,13 +1387,14 @@ async fn handle_db_response( // NOTE: ClickHouse returns 404 for all errors (so far encountered). We pull the text from // the body if possible, which contains the actual error from the database. let body = response.text().await.unwrap_or_else(|e| e.to_string()); - Err(Error::Database(body)) + Err(Error::Database(format!("Query failed: {body}"))) } } #[cfg(test)] mod tests { use super::*; + use crate::model::OXIMETER_VERSION; use crate::query; use crate::query::field_table_name; use bytes::Bytes; @@ -4267,4 +4474,87 @@ mod tests { db.cleanup().await.expect("Failed to cleanup ClickHouse server"); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_sql_query_output() { + let logctx = test_setup_log("test_sql_query_output"); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + client + .initialize_db_with_version(false, OXIMETER_VERSION) + .await + .expect("Failed to initialize timeseries database"); + let (_target, metrics, samples) = setup_select_test(); + client.insert_samples(&samples).await.unwrap(); + + // Sanity check that we get exactly the number of samples we expected. + let res = client + .query("SELECT count() AS total FROM service:request_latency") + .await + .unwrap(); + assert_eq!(res.table.rows.len(), 1); + let serde_json::Value::Number(n) = &res.table.rows[0][0] else { + panic!("Expected exactly 1 row with 1 item"); + }; + assert_eq!(n.as_u64().unwrap(), samples.len() as u64); + + // Assert grouping by the keys results in exactly the number of samples + // expected for each timeseries. + let res = client + .query( + "SELECT count() AS total \ + FROM service:request_latency \ + GROUP BY timeseries_key; \ + ", + ) + .await + .unwrap(); + assert_eq!(res.table.rows.len(), metrics.len()); + for row in res.table.rows.iter() { + assert_eq!(row.len(), 1); + let serde_json::Value::Number(n) = &row[0] else { + panic!("Expected a number in each row"); + }; + assert_eq!( + n.as_u64().unwrap(), + (samples.len() / metrics.len()) as u64 + ); + } + + // Read test SQL and make sure we're getting expected results. + let sql_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-output") + .join("sql"); + let mut rd = tokio::fs::read_dir(&sql_dir) + .await + .expect("failed to read SQL test directory"); + while let Some(next_entry) = + rd.next_entry().await.expect("failed to read directory entry") + { + let sql_file = next_entry.path().join("query.sql"); + let result_file = next_entry.path().join("result.txt"); + let query = tokio::fs::read_to_string(&sql_file) + .await + .unwrap_or_else(|_| { + panic!( + "failed to read test SQL query in '{}", + sql_file.display() + ) + }); + let res = client + .query(&query) + .await + .expect("failed to execute test query"); + expectorate::assert_contents( + result_file, + &serde_json::to_string_pretty(&res.table).unwrap(), + ); + } + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9029319048..24f7d8c2d0 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -33,9 +33,13 @@ use thiserror::Error; mod client; pub mod model; pub mod query; +pub mod sql; + pub use client::Client; pub use client::DbWrite; - +pub use client::QueryMetadata; +pub use client::QueryResult; +pub use client::Table; pub use model::OXIMETER_VERSION; #[derive(Debug, Error)] @@ -47,8 +51,14 @@ pub enum Error { #[error("Telemetry database unavailable: {0}")] DatabaseUnavailable(String), + #[error("Missing expected metadata header key '{key}'")] + MissingHeaderKey { key: String }, + + #[error("Invalid or malformed query metadata for key '{key}': {msg}")] + BadMetadata { key: String, msg: String }, + /// An error interacting with the telemetry database - #[error("Error interacting with telemetry database: {0}")] + #[error("Error interacting with telemetry database")] Database(String), /// A schema provided when collecting samples did not match the expected schema @@ -123,6 +133,9 @@ pub enum Error { #[error("Schema update versions must be sequential without gaps")] NonSequentialSchemaVersions, + + #[error("SQL error")] + Sql(#[from] sql::Error), } impl From for TimeseriesSchema { @@ -268,30 +281,8 @@ mod tests { use super::*; use crate::model::DbFieldList; use crate::model::DbTimeseriesSchema; - use std::convert::TryFrom; use uuid::Uuid; - #[test] - fn test_timeseries_name() { - let name = TimeseriesName::try_from("foo:bar").unwrap(); - assert_eq!(format!("{}", name), "foo:bar"); - } - - #[test] - fn test_timeseries_name_from_str() { - assert!(TimeseriesName::try_from("a:b").is_ok()); - assert!(TimeseriesName::try_from("a_a:b_b").is_ok()); - assert!(TimeseriesName::try_from("a0:b0").is_ok()); - assert!(TimeseriesName::try_from("a_0:b_0").is_ok()); - - assert!(TimeseriesName::try_from("_:b").is_err()); - assert!(TimeseriesName::try_from("a_:b").is_err()); - assert!(TimeseriesName::try_from("0:b").is_err()); - assert!(TimeseriesName::try_from(":b").is_err()); - assert!(TimeseriesName::try_from("a:").is_err()); - assert!(TimeseriesName::try_from("123").is_err()); - } - // Validates that the timeseries_key stability for a sample is stable. #[test] fn test_timeseries_key_sample_stability() { diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index 2caefb24c3..9212769573 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -296,6 +296,7 @@ impl SelectQueryBuilder { } } +/// Return the name of the measurements table for a datum type. pub(crate) fn measurement_table_name(ty: DatumType) -> String { format!("measurements_{}", ty.to_string().to_lowercase()) } @@ -335,6 +336,7 @@ pub struct FieldSelector { comparison: Option, } +/// Return the name of the field table for the provided field type. pub(crate) fn field_table_name(ty: FieldType) -> String { format!("fields_{}", ty.to_string().to_lowercase()) } diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs new file mode 100644 index 0000000000..1f84e208d2 --- /dev/null +++ b/oximeter/db/src/sql/mod.rs @@ -0,0 +1,1358 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Run SQL queries against the timeseries database. +//! +//! # Overview +//! +//! `oximeter` collects and stores samples from timeseries. The schema for those +//! samples is defined by applications, using the [`Target`](oximeter::Target) +//! and [`Metric`](oximeter::Metric) traits. Samples from these timeseries are +//! not stored in explicit tables, however. They are "unrolled" into the fields +//! and measurements, which are stored in a table based on their _data type_. +//! For example, `String` fields are stored in the `oximeter.fields_string` +//! table. (See RFD 161 for more details.) +//! +//! This arrangement is flexible and simple, since we can statically define the +//! tables we need, rather than say create a new table for each timeseries +//! schema. However, the drawback of this is that the timeseries data is not +//! easily queried directly. The data is split across many tables, and +//! interleaved with other timeseries, which may not even share a schema. +//! +//! The tools in this module are for making "normal" SQL queries transparently +//! act on the "virtual tables" that are implied by each timeseries. It's +//! effectively a SQL-to-SQL transpiler, converting queries against the +//! timeseries into one or more queries against the actual tables in ClickHouse. + +// Copyright 2023 Oxide Computer Company + +use crate::query::field_table_name; +use crate::query::measurement_table_name; +use crate::DatumType; +use crate::Error as OxdbError; +use crate::FieldType; +use crate::TimeseriesName; +use crate::TimeseriesSchema; +use indexmap::IndexSet; +use oximeter::MetricsError; +use sqlparser::ast::BinaryOperator; +use sqlparser::ast::Cte; +use sqlparser::ast::Distinct; +use sqlparser::ast::Expr; +use sqlparser::ast::Ident; +use sqlparser::ast::Join; +use sqlparser::ast::JoinConstraint; +use sqlparser::ast::JoinOperator; +use sqlparser::ast::ObjectName; +use sqlparser::ast::OrderByExpr; +use sqlparser::ast::Query; +use sqlparser::ast::Select; +use sqlparser::ast::SelectItem; +use sqlparser::ast::SetExpr; +use sqlparser::ast::Statement; +use sqlparser::ast::TableAlias; +use sqlparser::ast::TableFactor; +use sqlparser::ast::TableWithJoins; +use sqlparser::ast::Value; +use sqlparser::ast::With; +use sqlparser::dialect::AnsiDialect; +use sqlparser::dialect::Dialect; +use sqlparser::parser::Parser; +use sqlparser::parser::ParserError; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::ops::ControlFlow; +use std::sync::OnceLock; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("SQL parsing error")] + Parser(#[from] ParserError), + + #[error("Unsupported SQL: {0}")] + UnsupportedSql(&'static str), + + #[error("Unsupported function: '{func}'")] + UnsupportedFunction { func: String }, + + #[error("Invalid column '{name}' for timeseries '{timeseries_name}'")] + InvalidColumn { name: String, timeseries_name: String }, + + #[error( + "Table name '{table_name}' in select query does not match \ + timeseries name '{timeseries_name}'" + )] + TableInSelectIsNotTimeseries { table_name: String, timeseries_name: String }, + + #[error("Invalid timeseries name: '{name}'")] + InvalidTimeseriesName { name: String }, +} + +/// The oximeter timeseries SQL dialect. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct OxdbDialect; + +impl Dialect for OxdbDialect { + fn is_identifier_start(&self, ch: char) -> bool { + AnsiDialect {}.is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + AnsiDialect {}.is_identifier_part(ch) || ch == ':' + } +} + +/// A SQL statement that is probably supported. +/// +/// There's a big range of statements that are not supported. This is guaranteed +/// to be a single select statement, where all the items being selected FROM +/// are: +/// +/// - concrete tables that could be timeseries (valid names) +/// - a subquery against a restricted query +#[derive(Clone, Debug)] +pub struct RestrictedQuery { + safe_sql: SafeSql, + query: Query, + timeseries: IndexSet, +} + +impl std::fmt::Display for RestrictedQuery { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.query) + } +} + +macro_rules! unsupported { + ($msg:literal) => { + Err(OxdbError::from(Error::UnsupportedSql($msg))) + }; +} + +/// A helper type to preprocess any ClickHouse-specific SQL, and present a +/// known-safe version of it to the main `sqlparser` code. +/// +/// This is currently used to handle ASOF JOINs, which are a ClickHouse-specific +/// JOIN that joins rows based on a "closest match" condition. However, a +/// standard SQL parser will take an expression like: +/// +/// ```sql +/// SELECT foo ASOF JOIN bar +/// ``` +/// +/// And interpret the `ASOF` as an alias for `FOO`, as if one had written +/// +/// ```sql +/// SELECT foo AS asof JOIN bar +/// ``` +/// +/// This basically detects and removes a bare `ASOF` in that case, so the parser +/// can run normally. +#[derive(Clone, Debug)] +struct SafeSql { + original: String, + safe: String, +} + +impl SafeSql { + fn new(sql: impl AsRef) -> Self { + // The regex crate doesn't support look-arounds, so we'll have to + // manually find sequences like `ASOF JOIN`, that are not preceded by + // `AS`. + let sql = sql.as_ref().trim().trim_end_matches(';'); + let mut original = Vec::new(); + let mut safe = Vec::new(); + let mut tokens = sql.split_ascii_whitespace().peekable(); + while let Some(token) = tokens.next() { + // Always push the current token. + if token.parse::().is_ok() { + let tok = format!("\"{token}\""); + safe.push(tok.clone()); + original.push(tok); + } else { + safe.push(token.to_string()); + original.push(token.to_string()); + } + + // If the next token is ASOF, and the current is _not_ AS, then this + // is something like `select foo asof join bar`, and we want to chop + // out the `asof`. Consume the next token, and break the SQL string, + // by pushing a new chunk at the end. + if let Some(next_token) = tokens.peek() { + if !token.eq_ignore_ascii_case("as") + && next_token.eq_ignore_ascii_case("asof") + { + original.push(tokens.next().unwrap().to_string()); + } + } + } + Self { original: original.join(" "), safe: safe.join(" ") } + } + + fn safe_sql(&self) -> &str { + &self.safe + } +} + +impl RestrictedQuery { + /// Construct a new restricted query. + pub fn new(sql: impl AsRef) -> Result { + let safe_sql = SafeSql::new(sql); + let statements = Parser::parse_sql(&OxdbDialect, &safe_sql.safe_sql()) + .map_err(Error::from)?; + if statements.len() != 1 { + return unsupported!("Only a single SQL statement is supported"); + } + + let statement = statements.into_iter().next().unwrap(); + let Statement::Query(mut query) = statement else { + return unsupported!("Statement must be a SELECT query"); + }; + + // Walk the AST before doing any real processing or transformation, and + // validate any function calls are on the allow-list. + let maybe_denied_function = + sqlparser::ast::visit_expressions(&query, |expr| { + if let Expr::Function(func) = expr { + if let Some(name) = func.name.0.first() { + if !function_allow_list() + .iter() + .any(|f| f.name == name.value.as_str()) + { + return ControlFlow::Break(name.value.clone()); + } + } + } + ControlFlow::Continue(()) + }); + if let ControlFlow::Break(func) = maybe_denied_function { + return Err(OxdbError::from(Error::UnsupportedFunction { func })); + }; + + let timeseries = Self::process_query(&mut query)?; + Ok(Self { safe_sql, query: *query, timeseries }) + } + + /// Convert the original SQL into a query specifically for the `oximeter` + /// timeseries table organization. + pub fn to_oximeter_sql( + &self, + timeseries_schema: &BTreeMap, + ) -> Result { + self.generate_timeseries_ctes(×eries_schema).map(|cte_tables| { + if cte_tables.is_empty() { + // The query didn't reference any timeseries at all, let's just + // return it + self.safe_sql.original.clone() + } else { + // There are some timeseries referenced. Let's return a query + // constructed by building the CTEs, and then the _original_ + // SQL, which may have `ASOF JOIN`s in it. + format!( + "{} {}", + With { recursive: false, cte_tables }, + self.safe_sql.original, + ) + } + }) + } + + // For each timeseries named in `self`, generate a CTE that creates the + // virtual table for that timeseries by joining all its component parts. + fn generate_timeseries_ctes( + &self, + timeseries_schema: &BTreeMap, + ) -> Result, OxdbError> { + let mut ctes = Vec::with_capacity(self.timeseries.len()); + for timeseries in self.timeseries.iter() { + let schema = + timeseries_schema.get(timeseries).ok_or_else(|| { + OxdbError::TimeseriesNotFound( + timeseries.as_str().to_owned(), + ) + })?; + ctes.push(Self::build_timeseries_cte(schema)); + } + Ok(ctes) + } + + // Given a timeseries schema, return a CTE which generates the equivalent + // virtual table. + // + // As timeseries samples are ingested, we "unroll" them in various ways, and + // store them in a set of normalized tables. These contain the _fields_ (on + // table per field data type) and the measurements (one table per + // measurement data type). This method reverses that process, creating a + // single, virtual table that represents all samples from the timeseries + // (plural) of the same schema. + // + // It generates a CTE like so: + // + // ```sql + // WITH {timeseries_name} AS ( + // SELECT + // timeseries_key, + // filter_on_{field_name0}.field_value as {field_name0}, + // filter_on_{field_name1}.field_value as {field_name1}, + // ... + // measurements.timestamp AS timestamp, + // measurements.datum as datum, + // FROM + // ( + // SELECT DINSTINCT timeseries_key, + // field_value + // FROM + // fields_{field_type} + // WHERE + // timeseries_name = '{timeseries_name}' + // AND field_name = '{field_name0} + // ) AS filter_on_{field_name0} + // JOIN ( + // ... select next field table + // ) AS filter_on_{field_name1} ON filter_on_{field_name0}.timeseries_key = filter_on_{field_name1} + // ... + // JOIN ( + // SELECT + // timeseries_key, + // timestamp, + // datum, + // FROM + // measurements_{datum_type} + // WHERE + // timeseries_name = '{timeseries_name}' + // ) AS measurements ON filter_on_fieldN.timeseries_key = measurements.timeseries_key + // ORDER BY + // timeseries_key, + // timestamp + // ) + // ``` + // + // In other words, it should generate a CTE that one can query as if the + // timeseries itself where an actual table in the database, like: + // + // ``` + // timeseries_key | field_name0 | field_name1 | ... | timestamp | datum + // ---------------+-------------+-------------+ ... +-----------+------ + // key0 | field0_0 | field0_1 | ... | t0 | d0 + // key0 | field0_0 | field0_1 | ... | t1 | d1 + // key0 | field0_0 | field0_1 | ... | t2 | d2 + // key0 | field0_0 | field0_1 | ... | t3 | d3 + // ... + // key1 | field1_0 | field1_1 | ... | t0 | d0 + // key1 | field1_0 | field1_1 | ... | t1 | d1 + // key1 | field1_0 | field1_1 | ... | t2 | d2 + // key1 | field1_0 | field1_1 | ... | t3 | d3 + // ... + // ``` + // + // In this case, all rows with `key0` are from the "first" timeseries with + // this schema. `fieldX_Y` indicates the Yth field from timeseries with + // `key0` as its key. + fn build_timeseries_cte(schema: &TimeseriesSchema) -> Cte { + // First build each query against the relevant field tables. + // + // These are the `SELECT DISTINCT ... FROM fields_{field_type}` + // subqueries above. + let mut field_queries = Vec::with_capacity(schema.field_schema.len()); + for field_schema in schema.field_schema.iter() { + let field_query = Self::build_field_query( + &schema.timeseries_name, + &field_schema.name, + &field_schema.field_type, + ); + field_queries.push((field_schema.name.as_str(), field_query)); + } + + // Generate the last measurement query, the last subquery in the main + // CTE. + let measurement_query = Self::build_measurement_query( + &schema.timeseries_name, + &schema.datum_type, + ); + + // The "top-level" columns are the columns outputted by the CTE itself. + // + // These are the aliased columns of the full, reconstructed table + // representing the timeseries. This makes the timeseries_key available, + // as well as each field aliased to the actual field name, and the + // measurements. + let mut top_level_projections = + Vec::with_capacity(field_queries.len() + 2); + + // Create the projection of the top-level timeseries_key. + // + // This is taken from the first field, which always exists, since + // timeseries have at least one field. This creates the expression: + // `filter_{field_name}.timeseries_key AS timeseries_key` + let timeseries_key_projection = SelectItem::ExprWithAlias { + expr: Expr::CompoundIdentifier(vec![ + Self::field_subquery_alias(field_queries[0].0), + Self::str_to_ident("timeseries_key"), + ]), + alias: Self::str_to_ident("timeseries_key"), + }; + top_level_projections.push(timeseries_key_projection); + + // We'll build a big `TableWithJoins` to express the entire JOIN + // operation between all fields and the measurements. This is the "meat" + // of the CTE for this timeseries, joining the constituent records into + // the virtual table for this schema. + // + // We select first from the subquery specifying the first field query. + let mut cte_from = TableWithJoins { + relation: TableFactor::Derived { + lateral: false, + subquery: Self::select_to_query(field_queries[0].1.clone()), + alias: Some(TableAlias { + name: Self::field_subquery_alias(field_queries[0].0), + columns: vec![], + }), + }, + joins: Vec::with_capacity(field_queries.len()), + }; + + // For all field queries, create a projection for the field_value, + // aliased as the field name. + let field_queries: Vec<_> = field_queries.into_iter().collect(); + for (i, (field_name, query)) in field_queries.iter().enumerate() { + // Select the field_value from this field query, renaming it to the + // actual field name. + let projection = SelectItem::ExprWithAlias { + expr: Expr::CompoundIdentifier(vec![ + Self::field_subquery_alias(field_name), + Self::str_to_ident("field_value"), + ]), + alias: Self::str_to_ident(field_name), + }; + top_level_projections.push(projection); + + // We've inserted the first subquery as the `from.relation` field in + // the main CTE we're building. We need to skip that one, even + // though we added its aliased `field_value` column to the top level + // projections. + // + // Any additional field subqueries are inserted in the JOIN portion + // of the CTE. + if i == 0 { + continue; + } + let relation = TableFactor::Derived { + lateral: false, + subquery: Self::select_to_query(query.clone()), + alias: Some(TableAlias { + name: Self::field_subquery_alias(field_name), + columns: vec![], + }), + }; + + // The join is always INNER, and is on the timeseries_key only. + // ClickHouse does not support `USING ` when using multiple + // JOINs simultaneously, so we always write this as an `ON` + // constraint, between the previous field subquery and this one. + // + // I.e., `ON filter_foo.timeseries_key = filter_bar.timeseries_key` + let last_field_name = &field_queries[i - 1].0; + let constraints = Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Self::field_subquery_alias(last_field_name), + Self::str_to_ident("timeseries_key"), + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Self::field_subquery_alias(field_name), + Self::str_to_ident("timeseries_key"), + ])), + }; + let join_operator = + JoinOperator::Inner(JoinConstraint::On(constraints)); + cte_from.joins.push(Join { relation, join_operator }); + } + + // Finally, we need to project and join in the measurements table. + let datum_columns = Self::datum_type_to_columns(&schema.datum_type); + for col in datum_columns.iter() { + let projection = SelectItem::ExprWithAlias { + expr: Expr::CompoundIdentifier(vec![ + Self::str_to_ident("measurements"), + Self::str_to_ident(col), + ]), + alias: Self::str_to_ident(col), + }; + top_level_projections.push(projection); + } + let relation = TableFactor::Derived { + lateral: false, + subquery: Self::select_to_query(measurement_query), + alias: Some(TableAlias { + name: Self::str_to_ident("measurements"), + columns: vec![], + }), + }; + let constraints = Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Self::field_subquery_alias( + &schema.field_schema.last().unwrap().name, + ), + Self::str_to_ident("timeseries_key"), + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Self::str_to_ident("measurements"), + Self::str_to_ident("timeseries_key"), + ])), + }; + let join_operator = + JoinOperator::Inner(JoinConstraint::On(constraints)); + cte_from.joins.push(Join { relation, join_operator }); + + // To build the real virtual table for all the timeseries, we really + // need to sort the samples as if they were inserted into the table + // itself. ClickHouse partitions the tables dynamically since we're + // using a MergeTree engine, which groups and repacks rows in the + // background. + // + // We'll impose a consistent sorting order here. If one does not include + // this, results are inconsistent, since the different data parts of the + // measurements tables are not read in order every time. + let order_by = top_level_projections + .iter() + .filter_map(|proj| { + let SelectItem::ExprWithAlias { alias, .. } = &proj else { + unreachable!(); + }; + if alias.value == "timeseries_key" + || alias.value == "start_time" + || alias.value == "timestamp" + { + Some(OrderByExpr { + expr: Expr::Identifier(alias.clone()), + asc: None, + nulls_first: None, + }) + } else { + None + } + }) + .collect(); + + // We now have all the subqueries joined together, plus the columns + // we're projecting from that join result. We need to build the final + // CTE that represents the full virtual timeseries table. + let alias = TableAlias { + name: Ident { + value: schema.timeseries_name.to_string(), + quote_style: Some('"'), + }, + columns: vec![], + }; + let top_level_select = Select { + distinct: None, + top: None, + projection: top_level_projections, + into: None, + from: vec![cte_from], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }; + let mut query = Self::select_to_query(top_level_select); + query.order_by = order_by; + Cte { alias, query, from: None } + } + + // Create a SQL parser `Ident` with a the given name. + fn str_to_ident(s: &str) -> Ident { + Ident { value: s.to_string(), quote_style: None } + } + + // Return an `Ident` alias for a subquery of a specific field table. + // + // E.g., the `filter_on_foo` in `(SELECT DISTINCT ... ) AS filter_on_foo`. + fn field_subquery_alias(field_name: &str) -> Ident { + Self::str_to_ident(format!("filter_on_{field_name}").as_str()) + } + + // Return the required measurement columns for a specific datum type. + // + // Scalar measurements have only a timestamp and datum. Cumulative counters + // have those plus a start_time. And histograms have those plus the bins. + fn datum_type_to_columns( + datum_type: &DatumType, + ) -> &'static [&'static str] { + if datum_type.is_histogram() { + &["start_time", "timestamp", "bins", "counts"] + } else if datum_type.is_cumulative() { + &["start_time", "timestamp", "datum"] + } else { + &["timestamp", "datum"] + } + } + + fn select_to_query(select: Select) -> Box { + Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(select))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + }) + } + + // Build a single subquery which selects the unique fields with the provided + // name. E.g., this creates: + // + // ```sql + // SELECT DISTINCT timeseries_key, + // field_value + // FROM + // fields_{field_type} + // WHERE + // timeseries_name = '{timeseries_name}' + // AND field_name = '{field_name}' + // ``` + fn build_field_query( + timeseries_name: &TimeseriesName, + field_name: &str, + field_type: &FieldType, + ) -> Select { + // FROM fields_{field_type} + let from = TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Self::str_to_ident(&field_table_name( + *field_type, + ))]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }; + + // SELECT timeseries_key, field_value + let projection = vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident( + "timeseries_key", + ))), + SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident( + "field_value", + ))), + ]; + + // WHERE timeseries_name = '{timeseries_name}' AND field_name = '{field_name}' + let selection = Some(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Self::str_to_ident( + "timeseries_name", + ))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + timeseries_name.to_string(), + ))), + }), + op: BinaryOperator::And, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Self::str_to_ident( + "field_name", + ))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + field_name.to_string(), + ))), + }), + }); + + Select { + distinct: Some(Distinct::Distinct), + top: None, + projection, + into: None, + from: vec![from], + lateral_views: vec![], + selection, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + } + } + + // Build a single subquery which selects the measurements with the provided + // name. E.g., this creates: + // + // ```sql + // SELECT + // timeseries_key, + // timestamp, + // datum + // FROM + // measurements_{datum_type} + // WHERE + // timeseries_name = '{timeseries_name}' + // ``` + fn build_measurement_query( + timeseries_name: &TimeseriesName, + datum_type: &DatumType, + ) -> Select { + // FROM measurements_{datum_type} + let from = TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Self::str_to_ident( + &measurement_table_name(*datum_type), + )]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }; + + // SELECT timeseries_key, timestamp, [datum type columns] + let mut projection = vec![SelectItem::UnnamedExpr(Expr::Identifier( + Self::str_to_ident("timeseries_key"), + ))]; + let datum_projection = Self::datum_type_to_columns(datum_type); + projection.extend(datum_projection.iter().map(|name| { + SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident(name))) + })); + + // WHERE timeseries_name = '{timeseries_name}' + let selection = Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Self::str_to_ident( + "timeseries_name", + ))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + timeseries_name.to_string(), + ))), + }); + + Select { + distinct: None, + top: None, + projection, + into: None, + from: vec![from], + lateral_views: vec![], + selection, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + } + } + + // Verify that the identifier is a single, concrete timeseries name. + fn extract_timeseries_name( + from: &[Ident], + ) -> Result, OxdbError> { + if from.len() != 1 { + return unsupported!( + "Query must select from single named \ + timeseries, with no database" + ); + } + from[0] + .value + .parse() + .map(|n| indexmap::indexset! { n }) + .map_err(|_| MetricsError::InvalidTimeseriesName.into()) + } + + // Process a single "table factor", the in `FROM ` to + // extract the names of the timeseries it refers to. + // + // Note this is recursive since we do support basic inner joins. + fn process_table_factor( + relation: &mut TableFactor, + ) -> Result, OxdbError> { + match relation { + TableFactor::Table { ref mut name, args, with_hints, .. } => { + if args.is_some() || !with_hints.is_empty() { + return unsupported!( + "Table functions and hints are not supported" + ); + } + let timeseries_name = Self::extract_timeseries_name(&name.0)?; + // Rewrite the quote style to be backticks, so that the + // resulting actual query translates into a valid identifier for + // ClickHouse, naming the CTE's well generate later. + name.0[0].quote_style = Some('"'); + Ok(timeseries_name) + } + TableFactor::Derived { lateral: false, subquery, .. } => { + RestrictedQuery::process_query(subquery) + } + _ => { + return unsupported!( + "Query must select from concrete tables or subqueries on them" + ) + } + } + } + + // Process a parsed query, returning the named timeseries that it refers to. + // + // This is the entry-point for our query processing implementation. We take + // a parsed query from `sqlparser`, and extract the virtual tables + // (timeseries names) that we'll need to construct in order to actually run + // it against our database. + // + // Note that we return an _ordered set_ of the timeseries names. This is to + // produce the CTEs that correspond to each timeseries, but without + // duplicating the actual CTE. + fn process_query( + query: &mut Query, + ) -> Result, OxdbError> { + // Some basic checks limiting the scope of the query. + if query.with.is_some() + || query.fetch.is_some() + || !query.locks.is_empty() + { + return unsupported!( + "CTEs, FETCH and LOCKS are not currently supported" + ); + } + let SetExpr::Select(select) = &mut *query.body else { + return unsupported!("Only SELECT queries are currently supported"); + }; + + // For each object we're selecting from (a table factor), process that + // directly, and process any JOINs it also contains. + let mut timeseries = IndexSet::with_capacity(select.from.len()); + if select.from.len() > 1 { + return unsupported!( + "Query must select from a single named table, with no database" + ); + } + if let Some(from) = select.from.iter_mut().next() { + timeseries.extend(Self::process_table_factor(&mut from.relation)?); + for join in from.joins.iter_mut() { + let JoinOperator::Inner(op) = &join.join_operator else { + return unsupported!( + "Only INNER JOINs are supported, using \ + explicit constraints" + ); + }; + if matches!(op, JoinConstraint::Natural) { + return unsupported!( + "Only INNER JOINs are supported, using \ + explicit constraints" + ); + } + timeseries + .extend(Self::process_table_factor(&mut join.relation)?); + } + } + Ok(timeseries) + } +} + +static CLICKHOUSE_FUNCTION_ALLOW_LIST: OnceLock> = + OnceLock::new(); + +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct ClickHouseFunction { + pub name: &'static str, + pub usage: &'static str, + pub description: &'static str, +} + +impl std::fmt::Display for ClickHouseFunction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} + +impl ClickHouseFunction { + fn new(usage: &'static str, description: &'static str) -> Self { + let name = usage.split_once('(').expect("need parentheses").0; + Self { name, usage, description } + } +} + +/// Return the set of supported ClickHouse SQL functions, with a short help +/// string. +pub fn function_allow_list() -> &'static BTreeSet { + CLICKHOUSE_FUNCTION_ALLOW_LIST.get_or_init(|| { + let mut out = BTreeSet::new(); + + // Core functions + out.insert(ClickHouseFunction::new("avg(expr)", "Arithmetic mean")); + out.insert(ClickHouseFunction::new("min(expr)", "Minimum value")); + out.insert(ClickHouseFunction::new("max(expr)", "Maximum value")); + out.insert(ClickHouseFunction::new("sum(expr)", "Sum values")); + out.insert(ClickHouseFunction::new( + "count(expr)", + "Count number of rows", + )); + out.insert(ClickHouseFunction::new( + "now()", + "Return current timestamp", + )); + out.insert(ClickHouseFunction::new( + "first_value(expr)", + "First value in a partition", + )); + out.insert(ClickHouseFunction::new( + "last_value(expr)", + "Last value in a partition", + )); + out.insert(ClickHouseFunction::new( + "any(expr)", + "First non-NULL value", + )); + out.insert(ClickHouseFunction::new( + "topK(k)(expr)", + "Estimate K most frequent values", + )); + out.insert(ClickHouseFunction::new( + "groupArray(expr)", + "Create an array from rows", + )); + out.insert(ClickHouseFunction::new( + "argMin(arg, val)", + "Argument of minimum value", + )); + out.insert(ClickHouseFunction::new( + "argMax(arg, val)", + "Argument of maximum value", + )); + out.insert(ClickHouseFunction::new( + "quantileExact(quantile)(expr)", + "Exact quantile of inputs", + )); + + // To support histograrms, we allow the `-ForEach` combinator functions. + // + // See + // https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators#-foreach, + // but briefly, this allows computing the aggregate function across + // corresponding array elements. + out.insert(ClickHouseFunction::new( + "maxForEach(array expr)", + "Max of corresponding array elements", + )); + out.insert(ClickHouseFunction::new( + "minForEach(array expr)", + "Min of corresponding array elements", + )); + out.insert(ClickHouseFunction::new( + "sumForEach(array expr)", + "Sum of corresponding array elements", + )); + out.insert(ClickHouseFunction::new( + "avgForEach(array expr)", + "Mean of corresponding array elements", + )); + + // Type conversions + // + // Note that `cast` itself will be difficult to use, because ClickHouse is + // particular about the capitalization of type names, e.g., it must be + // `cast(x as String)` not `cast(x as STRING)`. + out.insert(ClickHouseFunction::new( + "toString(x)", + "Convert to a string", + )); + out.insert(ClickHouseFunction::new("toInt8(x)", "Convert to an i8")); + out.insert(ClickHouseFunction::new("toUInt8(x)", "Convert to a u8")); + out.insert(ClickHouseFunction::new("toInt16(x)", "Convert to an i16")); + out.insert(ClickHouseFunction::new("toUInt16(x)", "Convert to a u16")); + out.insert(ClickHouseFunction::new("toInt32(x)", "Convert to an i32")); + out.insert(ClickHouseFunction::new("toUInt32(x)", "Convert to a u32")); + out.insert(ClickHouseFunction::new("toInt64(x)", "Convert to an i64")); + out.insert(ClickHouseFunction::new("toUInt64(x)", "Convert to a u64")); + out.insert(ClickHouseFunction::new( + "toFloat32(x)", + "Convert to an f32", + )); + out.insert(ClickHouseFunction::new( + "toFloat64(x)", + "Convert to an f64", + )); + out.insert(ClickHouseFunction::new( + "toDate(x)", + "Convert to a 32-bit date", + )); + out.insert(ClickHouseFunction::new( + "toDateTime(x)", + "Convert to a 32-bit date and time", + )); + out.insert(ClickHouseFunction::new( + "toDateTime64(x)", + "Convert to a 64-bit date and time", + )); + out.insert(ClickHouseFunction::new( + "toIntervalYear(x)", + "Convert to an interval in years", + )); + out.insert(ClickHouseFunction::new( + "toIntervalQuarter(x)", + "Convert to an interval in quarters", + )); + out.insert(ClickHouseFunction::new( + "toIntervalMonth(x)", + "Convert to an interval in months", + )); + out.insert(ClickHouseFunction::new( + "toIntervalWeek(x)", + "Convert to an interval in weeks", + )); + out.insert(ClickHouseFunction::new( + "toIntervalDay(x)", + "Convert to an interval in days", + )); + out.insert(ClickHouseFunction::new( + "toIntervalHour(x)", + "Convert to an interval in hours", + )); + out.insert(ClickHouseFunction::new( + "toIntervalMinute(x)", + "Convert to an interval in minutes", + )); + out.insert(ClickHouseFunction::new( + "toIntervalSecond(x)", + "Convert to an interval in seconds", + )); + + // Array functions + out.insert(ClickHouseFunction::new( + "arrayMax([func,] arr)", + "Maximum in source array", + )); + out.insert(ClickHouseFunction::new( + "arrayMin([func,] arr)", + "Minimum in source array", + )); + out.insert(ClickHouseFunction::new( + "arraySum([func,] arr)", + "Sum of elements in source array", + )); + out.insert(ClickHouseFunction::new( + "arrayAvg([func,] arr)", + "Mean of elements in source array", + )); + out.insert(ClickHouseFunction::new( + "arrayMap(func, arr, ...)", + "Apply function to elements in source array", + )); + out.insert(ClickHouseFunction::new( + "arrayReduce(func, arr, ...)", + "Aggregate elements in source array with a function", + )); + out.insert(ClickHouseFunction::new( + "arrayFilter(func, arr, ...)", + "Apply a lambda to source array", + )); + out.insert(ClickHouseFunction::new( + "arrayDifference(arr)", + "Difference between adjacent elements in source array", + )); + out.insert(ClickHouseFunction::new( + "indexOf(arr, x)", + "Index of `x` in source array, or 0", + )); + out.insert(ClickHouseFunction::new( + "length(arr)", + "Length of source array", + )); + + // Strings + out.insert(ClickHouseFunction::new( + "empty(x)", + "True if array or string is empty", + )); + out.insert(ClickHouseFunction::new( + "lower(x)", + "Convert a string to lowercase", + )); + out.insert(ClickHouseFunction::new( + "upper(x)", + "Convert a string to uppercase", + )); + out.insert(ClickHouseFunction::new( + "reverse(x)", + "Reverse the bytes (not chars) in a string", + )); + out.insert(ClickHouseFunction::new( + "reverseUTF8(x)", + "Reverse the characters in a string", + )); + out.insert(ClickHouseFunction::new( + "concat(s1, s2, ...)", + "Concatenate two or more strings", + )); + out.insert(ClickHouseFunction::new( + "concatWithSeparator(sep, s1, s2, ..)", + "Concatenate two or more strings with a separator", + )); + out.insert(ClickHouseFunction::new( + "substring(s, offset, len)", + "Return a substring", + )); + out.insert(ClickHouseFunction::new( + "endsWith(s, suffix)", + "True if `s` ends with `suffix`", + )); + out.insert(ClickHouseFunction::new( + "startsWith(s, prefix)", + "True if `s` starts with `prefix`", + )); + out.insert(ClickHouseFunction::new( + "splitByChar(sep, s[, limit])", + "Split on a separator, up to `limit` times", + )); + out.insert(ClickHouseFunction::new( + "splitByString(sep, s[, limit])", + "Split by a separating string, up to `limit` times", + )); + + // Time. + out.insert(ClickHouseFunction::new( + "tumble(datetime, interval[, tz])", + "Nonoverlapping time windows of a specified interval", + )); + out.insert(ClickHouseFunction::new( + "toYear(date)", + "Extract year from date", + )); + out.insert(ClickHouseFunction::new( + "toQuarter(date)", + "Extract quarter from date", + )); + out.insert(ClickHouseFunction::new( + "toMonth(date)", + "Extract month from date", + )); + out.insert(ClickHouseFunction::new( + "toDayOfYear(date)", + "Index of day in its year", + )); + out.insert(ClickHouseFunction::new( + "toDayOfMonth(date)", + "Index of day in its month", + )); + out.insert(ClickHouseFunction::new( + "toDayOfWeek(date)", + "Index of day in its week", + )); + out.insert(ClickHouseFunction::new( + "toHour(date)", + "Extract hour from date", + )); + out.insert(ClickHouseFunction::new( + "toMinute(date)", + "Extract minute from date", + )); + out.insert(ClickHouseFunction::new( + "toSecond(date)", + "Extract second from date", + )); + out.insert(ClickHouseFunction::new( + "toUnixTimestamp(date)", + "Convert to UNIX timestamp", + )); + out.insert(ClickHouseFunction::new( + "toStartOfInterval(date, INTERVAL x UNIT[, tz])", + "Convert date to the start of the specified interval", + )); + out.insert(ClickHouseFunction::new( + "date_diff('unit', start, end[, tz])", + "Difference between two dates in the provided unit", + )); + out.insert(ClickHouseFunction::new( + "date_trunc('unit', date[, tz])", + "Truncate a datetime to the provided unit", + )); + out.insert(ClickHouseFunction::new( + "date_add('unit', count, date)", + "Add `count` units to `date`", + )); + out.insert(ClickHouseFunction::new( + "date_sub('unit', count, date)", + "Subtract `count` units from `date`", + )); + + // Other + out.insert(ClickHouseFunction::new( + "generateUUIDv4()", + "Generate a random UUID v4", + )); + out.insert(ClickHouseFunction::new("rand()", "Uniform random u32")); + out.insert(ClickHouseFunction::new("rand64()", "Uniform random u64")); + out.insert(ClickHouseFunction::new( + "runningDifference(arr)", + "Difference between adjacent values", + )); + out.insert(ClickHouseFunction::new( + "formatReadableSize(x)", + "Format a byte count for humans", + )); + out.insert(ClickHouseFunction::new( + "formatReadableTimeDelta(x)", + "Format an interval for humans", + )); + out.insert(ClickHouseFunction::new( + "formatReadableQuantity(x)", + "Format a quantity for humans", + )); + out + }) +} + +#[cfg(test)] +mod tests { + use super::Error; + use super::OxdbError; + use super::RestrictedQuery; + use super::SafeSql; + + #[test] + fn test_function_allow_list() { + assert!(RestrictedQuery::new("SELECT bogus()").is_err()); + assert!(matches!( + RestrictedQuery::new("SELECT bogus()").unwrap_err(), + OxdbError::Sql(Error::UnsupportedFunction { .. }) + )); + assert!(RestrictedQuery::new("SELECT now()").is_ok()); + } + + #[test] + fn test_ctes_are_not_supported() { + assert!(matches!( + RestrictedQuery::new("WITH nono AS (SELECT 1) SELECT * FROM NONO") + .unwrap_err(), + OxdbError::Sql(Error::UnsupportedSql(_)) + )); + } + + #[test] + fn test_multiple_statements_are_not_supported() { + assert!(matches!( + RestrictedQuery::new("SELECT 1; SELECT 2;").unwrap_err(), + OxdbError::Sql(Error::UnsupportedSql(_)) + )); + } + + #[test] + fn test_query_must_be_select_statement() { + for query in [ + "SHOW TABLES", + "DROP TABLE foo", + "CREATE TABLE foo (x Int4)", + "DESCRIBE foo", + "EXPLAIN SELECT 1", + "INSERT INTO foo VALUES (1)", + ] { + let err = RestrictedQuery::new(query).unwrap_err(); + println!("{err:?}"); + assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_)))); + } + } + + #[test] + fn test_cannot_name_database() { + let err = RestrictedQuery::new("SELECT * FROM dbname.a:a").unwrap_err(); + assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_)))); + } + + #[test] + fn test_with_comma_join_fails() { + let err = RestrictedQuery::new("SELECT * FROM a:a, b:b").unwrap_err(); + println!("{err:?}"); + assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_)))); + } + + #[test] + fn test_join_must_be_inner() { + let allowed = ["inner", ""]; + let denied = + ["natural", "cross", "left outer", "right outer", "full outer"]; + for join in allowed.iter() { + RestrictedQuery::new(format!("SELECT * FROM a:a {join} JOIN b:b")) + .unwrap_or_else(|_| { + panic!("Should be able to use join type '{join}'") + }); + } + for join in denied.iter() { + let sql = format!("SELECT * FROM a:a {join} JOIN b:b"); + println!("{sql}"); + let err = RestrictedQuery::new(&sql).expect_err( + format!("Should not be able to use join type '{join}'") + .as_str(), + ); + println!("{err:?}"); + assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_)))); + } + } + + #[test] + fn test_allow_limit_offset() { + let sql = "SELECT * FROM a:b LIMIT 10 OFFSET 10;"; + println!("{sql}"); + RestrictedQuery::new(&sql) + .expect("Should be able to use LIMIT / OFFSET queries"); + } + + #[test] + fn test_require_table_is_timeseries_name() { + assert!(RestrictedQuery::new("SELECT * FROM a:b").is_ok()); + let bad = ["table", "db.table", "no:no:no"]; + for each in bad.iter() { + let sql = format!("SELECT * FROM {each}"); + RestrictedQuery::new(&sql) + .expect_err("Should have validated timeseries name"); + } + } + + #[test] + fn test_allow_subqueries() { + assert!(RestrictedQuery::new("SELECT * FROM (SELECT 1);").is_ok()); + } + + #[test] + fn test_query_with_multiple_timeseries_generates_one_cte() { + let query = "SELECT * FROM a:b JOIN a:b USING (timeseries_key);"; + let res = RestrictedQuery::new(&query).unwrap(); + assert_eq!(res.timeseries.len(), 1); + } + + #[test] + fn test_safe_sql_does_not_modify_original_alias() { + let query = "SELECT * FROM a:b AS ASOF JOIN a:b"; + let query_with_quotes = "SELECT * FROM \"a:b\" AS ASOF JOIN \"a:b\""; + let safe = SafeSql::new(query); + let rewritten = safe.safe_sql(); + println!("{query}"); + println!("{query_with_quotes}"); + println!("{rewritten}"); + + // Check that we've written out the same query words, ignoring + // whitespace. + let words = query_with_quotes + .split_ascii_whitespace() + .rev() + .collect::>(); + let rewritten_words = rewritten + .split_ascii_whitespace() + .rev() + .take(words.len()) + .collect::>(); + assert_eq!(words, rewritten_words); + } +} diff --git a/oximeter/db/test-output/sql/00/query.sql b/oximeter/db/test-output/sql/00/query.sql new file mode 100644 index 0000000000..e0ac49d1ec --- /dev/null +++ b/oximeter/db/test-output/sql/00/query.sql @@ -0,0 +1 @@ +SELECT 1; diff --git a/oximeter/db/test-output/sql/00/result.txt b/oximeter/db/test-output/sql/00/result.txt new file mode 100644 index 0000000000..925e298e86 --- /dev/null +++ b/oximeter/db/test-output/sql/00/result.txt @@ -0,0 +1,10 @@ +{ + "column_names": [ + "1" + ], + "rows": [ + [ + 1 + ] + ] +} \ No newline at end of file diff --git a/oximeter/db/test-output/sql/01/query.sql b/oximeter/db/test-output/sql/01/query.sql new file mode 100644 index 0000000000..f3e5549e7f --- /dev/null +++ b/oximeter/db/test-output/sql/01/query.sql @@ -0,0 +1 @@ +SELECT 1 + 1 AS total; diff --git a/oximeter/db/test-output/sql/01/result.txt b/oximeter/db/test-output/sql/01/result.txt new file mode 100644 index 0000000000..ee17f9993e --- /dev/null +++ b/oximeter/db/test-output/sql/01/result.txt @@ -0,0 +1,10 @@ +{ + "column_names": [ + "total" + ], + "rows": [ + [ + 2 + ] + ] +} \ No newline at end of file diff --git a/oximeter/db/test-output/sql/02/query.sql b/oximeter/db/test-output/sql/02/query.sql new file mode 100644 index 0000000000..cd16a883aa --- /dev/null +++ b/oximeter/db/test-output/sql/02/query.sql @@ -0,0 +1 @@ +SELECT count() FROM service:request_latency WHERE route = '/a'; diff --git a/oximeter/db/test-output/sql/02/result.txt b/oximeter/db/test-output/sql/02/result.txt new file mode 100644 index 0000000000..7bae246ae8 --- /dev/null +++ b/oximeter/db/test-output/sql/02/result.txt @@ -0,0 +1,10 @@ +{ + "column_names": [ + "count()" + ], + "rows": [ + [ + 12 + ] + ] +} \ No newline at end of file diff --git a/oximeter/db/test-output/sql/03/query.sql b/oximeter/db/test-output/sql/03/query.sql new file mode 100644 index 0000000000..9d043eda5b --- /dev/null +++ b/oximeter/db/test-output/sql/03/query.sql @@ -0,0 +1,4 @@ +SELECT + count() AS total +FROM service:request_latency +GROUP BY name, id, route, method, status_code; diff --git a/oximeter/db/test-output/sql/03/result.txt b/oximeter/db/test-output/sql/03/result.txt new file mode 100644 index 0000000000..246b8a224e --- /dev/null +++ b/oximeter/db/test-output/sql/03/result.txt @@ -0,0 +1,43 @@ +{ + "column_names": [ + "total" + ], + "rows": [ + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ], + [ + 2 + ] + ] +} \ No newline at end of file diff --git a/oximeter/db/test-output/sql/04/query.sql b/oximeter/db/test-output/sql/04/query.sql new file mode 100644 index 0000000000..a276475a0d --- /dev/null +++ b/oximeter/db/test-output/sql/04/query.sql @@ -0,0 +1,5 @@ +SELECT + timeseries_key, + count() AS total +FROM service:request_latency +GROUP BY timeseries_key; diff --git a/oximeter/db/test-output/sql/04/result.txt b/oximeter/db/test-output/sql/04/result.txt new file mode 100644 index 0000000000..4eca1fd93d --- /dev/null +++ b/oximeter/db/test-output/sql/04/result.txt @@ -0,0 +1,56 @@ +{ + "column_names": [ + "timeseries_key", + "total" + ], + "rows": [ + [ + 1249464505628069370, + 2 + ], + [ + 1201872630192423018, + 2 + ], + [ + 1490072383288995413, + 2 + ], + [ + 4845785484328932020, + 2 + ], + [ + 16162802647654680800, + 2 + ], + [ + 9308844330114997943, + 2 + ], + [ + 5233273748839477731, + 2 + ], + [ + 12759963114254845848, + 2 + ], + [ + 8677807063017961056, + 2 + ], + [ + 17069599562714970297, + 2 + ], + [ + 1477351355909737762, + 2 + ], + [ + 16473879070749258520, + 2 + ] + ] +} \ No newline at end of file diff --git a/oximeter/oximeter/src/schema.rs b/oximeter/oximeter/src/schema.rs index b6953fda52..2a577fc8f1 100644 --- a/oximeter/oximeter/src/schema.rs +++ b/oximeter/oximeter/src/schema.rs @@ -261,7 +261,7 @@ impl PartialEq for TimeseriesSchema { // // That describes the target/metric name, and the timeseries is two of those, joined with ':'. const TIMESERIES_NAME_REGEX: &str = - "(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)"; + "^(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)$"; /// A set of timeseries schema, useful for testing changes to targets or /// metrics. @@ -502,6 +502,7 @@ mod tests { assert!(TimeseriesName::try_from(":b").is_err()); assert!(TimeseriesName::try_from("a:").is_err()); assert!(TimeseriesName::try_from("123").is_err()); + assert!(TimeseriesName::try_from("x.a:b").is_err()); } #[derive(Target)] diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index 3d74bec72c..eff5c399e3 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -330,6 +330,11 @@ impl DatumType { | DatumType::HistogramF64 ) } + + /// Return `true` if this datum type is a histogram, and `false` otherwise. + pub const fn is_histogram(&self) -> bool { + matches!(self, DatumType::HistogramF64 | DatumType::HistogramI64) + } } impl std::fmt::Display for DatumType { diff --git a/wicket-dbg/Cargo.toml b/wicket-dbg/Cargo.toml index f9047297af..f42ed335c8 100644 --- a/wicket-dbg/Cargo.toml +++ b/wicket-dbg/Cargo.toml @@ -20,7 +20,7 @@ tokio = { workspace = true, features = ["full"] } wicket.workspace = true # used only by wicket-dbg binary -reedline = "0.26.0" +reedline.workspace = true omicron-workspace-hack.workspace = true [[bin]] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index aa2461b980..01cd1bdb68 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -33,7 +33,6 @@ console = { version = "0.15.7" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } -crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } @@ -67,6 +66,7 @@ libc = { version = "0.2.151", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } +nom = { version = "7.1.3" } num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } @@ -136,7 +136,6 @@ console = { version = "0.15.7" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } -crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } @@ -170,6 +169,7 @@ libc = { version = "0.2.151", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } +nom = { version = "7.1.3" } num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } From 180616e87c6d838169ecb04bde6e0640dc654f74 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 12 Dec 2023 18:45:07 -0800 Subject: [PATCH 095/186] Let start saga handle unwinding from sled agent instance PUT errors (#4682) Remove `Nexus::handle_instance_put_result`. In its place, make Nexus instance routines that invoke sled agent instance PUT endpoints decide how to handle their own errors, and be more explicit about the specific kinds of errors these operations can produce. Use this flexibility to allow the instance start and migrate sagas handle failure to start a new instance (or to start a migration target) by unwinding instead of having to reckon with callee-defined side effects of failing a call to sled agent. Other callers continue to do what `handle_instance_put_result` did. Improve some tests: - Add a test variation to reproduce #4662. To support this, teach the simulated sled agent to let callers inject failure into calls to ensure an instance's state. - Fix up a bit of simulated sled agent logic that was unfaithful to the real sled agent's behavior and that caused the new test to pass when it should have failed. - Make sure that start saga tests that unwind explicitly verify that unwinding the saga doesn't leak provisioning counters. Tests: Cargo tests including the new start saga variation; smoke tested instance start/stop/reboot on a dev cluster. Fixes #4662. --- nexus/src/app/instance.rs | 393 ++++++++++++++---------- nexus/src/app/sagas/instance_create.rs | 74 +---- nexus/src/app/sagas/instance_migrate.rs | 69 +++-- nexus/src/app/sagas/instance_start.rs | 212 +++++++++++-- nexus/src/app/sagas/test_helpers.rs | 66 +++- sled-agent/src/sim/collection.rs | 10 + sled-agent/src/sim/sled_agent.rs | 22 +- 7 files changed, 569 insertions(+), 277 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 987a8ac794..93386a66d0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -61,6 +61,83 @@ use uuid::Uuid; const MAX_KEYS_PER_INSTANCE: u32 = 8; +type SledAgentClientError = + sled_agent_client::Error; + +// Newtype wrapper to avoid the orphan type rule. +#[derive(Debug)] +pub struct SledAgentInstancePutError(pub SledAgentClientError); + +impl std::fmt::Display for SledAgentInstancePutError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for SledAgentInstancePutError { + fn from(value: SledAgentClientError) -> Self { + Self(value) + } +} + +impl From for omicron_common::api::external::Error { + fn from(value: SledAgentInstancePutError) -> Self { + value.0.into() + } +} + +impl SledAgentInstancePutError { + /// Returns `true` if this error is of a class that indicates that Nexus + /// cannot assume anything about the health of the instance or its sled. + pub fn instance_unhealthy(&self) -> bool { + // TODO(#3238) TODO(#4226) For compatibility, this logic is lifted from + // the From impl that converts Progenitor client errors to + // `omicron_common::api::external::Error`s and from previous logic in + // this module that inferred instance health from those converted + // errors. In particular, some of the outer Progenitor client error + // variants (e.g. CommunicationError) can indicate transient conditions + // that aren't really fatal to an instance and don't otherwise indicate + // that it's unhealthy. + // + // To match old behavior until this is revisited, however, treat all + // Progenitor errors except for explicit error responses as signs of an + // unhealthy instance, and then only treat an instance as healthy if its + // sled returned a 400-level status code. + match &self.0 { + progenitor_client::Error::ErrorResponse(rv) => { + !rv.status().is_client_error() + } + _ => true, + } + } +} + +/// An error that can be returned from an operation that changes the state of an +/// instance on a specific sled. +#[derive(Debug, thiserror::Error)] +pub enum InstanceStateChangeError { + /// Sled agent returned an error from one of its instance endpoints. + #[error("sled agent client error: {0}")] + SledAgent(SledAgentInstancePutError), + + /// Some other error occurred outside of the attempt to communicate with + /// sled agent. + #[error(transparent)] + Other(#[from] omicron_common::api::external::Error), +} + +// Allow direct conversion of instance state change errors into API errors for +// callers who don't care about the specific reason the update failed and just +// need to return an API error. +impl From for omicron_common::api::external::Error { + fn from(value: InstanceStateChangeError) -> Self { + match value { + InstanceStateChangeError::SledAgent(e) => e.into(), + InstanceStateChangeError::Other(e) => e, + } + } +} + /// The kinds of state changes that can be requested of an instance's current /// VMM (i.e. the VMM pointed to be the instance's `propolis_id` field). pub(crate) enum InstanceStateChangeRequest { @@ -438,19 +515,30 @@ impl super::Nexus { }, ) .await - .map(|res| Some(res.into_inner())); + .map(|res| Some(res.into_inner().into())) + .map_err(|e| SledAgentInstancePutError(e)); // Write the updated instance runtime state back to CRDB. If this // outright fails, this operation fails. If the operation nominally // succeeds but nothing was updated, this action is outdated and the // caller should not proceed with migration. - let (updated, _) = self - .handle_instance_put_result( - &instance_id, - prev_instance_runtime, - instance_put_result.map(|state| state.map(Into::into)), - ) - .await?; + let (updated, _) = match instance_put_result { + Ok(state) => { + self.write_returned_instance_state(&instance_id, state).await? + } + Err(e) => { + if e.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &instance_id, + &prev_instance_runtime, + &e, + ) + .await; + } + return Err(e.into()); + } + }; if updated { Ok(self @@ -498,14 +586,26 @@ impl super::Nexus { }, ) .await - .map(|res| Some(res.into_inner())); + .map(|res| Some(res.into_inner().into())) + .map_err(|e| SledAgentInstancePutError(e)); - self.handle_instance_put_result( - &instance_id, - prev_instance_runtime, - instance_put_result.map(|state| state.map(Into::into)), - ) - .await?; + match instance_put_result { + Ok(state) => { + self.write_returned_instance_state(&instance_id, state).await?; + } + Err(e) => { + if e.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &instance_id, + &prev_instance_runtime, + &e, + ) + .await; + } + return Err(e.into()); + } + } Ok(()) } @@ -631,22 +731,18 @@ impl super::Nexus { opctx: &OpContext, authz_instance: &authz::Instance, sled_id: &Uuid, - prev_instance_runtime: &db::model::InstanceRuntimeState, - ) -> Result<(), Error> { + ) -> Result, InstanceStateChangeError> + { opctx.authorize(authz::Action::Modify, authz_instance).await?; let sa = self.sled_client(&sled_id).await?; - let result = sa - .instance_unregister(&authz_instance.id()) + sa.instance_unregister(&authz_instance.id()) .await - .map(|res| res.into_inner().updated_runtime); - - self.handle_instance_put_result( - &authz_instance.id(), - prev_instance_runtime, - result.map(|state| state.map(Into::into)), - ) - .await - .map(|_| ()) + .map(|res| res.into_inner().updated_runtime.map(Into::into)) + .map_err(|e| { + InstanceStateChangeError::SledAgent(SledAgentInstancePutError( + e, + )) + }) } /// Determines the action to take on an instance's active VMM given a @@ -799,7 +895,7 @@ impl super::Nexus { prev_instance_state: &db::model::Instance, prev_vmm_state: &Option, requested: InstanceStateChangeRequest, - ) -> Result<(), Error> { + ) -> Result<(), InstanceStateChangeError> { opctx.authorize(authz::Action::Modify, authz_instance).await?; let instance_id = authz_instance.id(); @@ -817,16 +913,23 @@ impl super::Nexus { &InstancePutStateBody { state: requested.into() }, ) .await - .map(|res| res.into_inner().updated_runtime) - .map(|state| state.map(Into::into)); + .map(|res| res.into_inner().updated_runtime.map(Into::into)) + .map_err(|e| SledAgentInstancePutError(e)); - self.handle_instance_put_result( - &instance_id, - prev_instance_state.runtime(), - instance_put_result, - ) - .await - .map(|_| ()) + // If the operation succeeded, write the instance state back, + // returning any subsequent errors that occurred during that + // write. + // + // If the operation failed, kick the sled agent error back up to + // the caller to let it decide how to handle it. + match instance_put_result { + Ok(state) => self + .write_returned_instance_state(&instance_id, state) + .await + .map(|_| ()) + .map_err(Into::into), + Err(e) => Err(InstanceStateChangeError::SledAgent(e)), + } } } } @@ -1046,143 +1149,117 @@ impl super::Nexus { }, ) .await - .map(|res| Some(res.into_inner())); + .map(|res| Some(res.into_inner().into())) + .map_err(|e| SledAgentInstancePutError(e)); - self.handle_instance_put_result( - &db_instance.id(), - db_instance.runtime(), - instance_register_result.map(|state| state.map(Into::into)), - ) - .await - .map(|_| ()) + match instance_register_result { + Ok(state) => { + self.write_returned_instance_state(&db_instance.id(), state) + .await?; + } + Err(e) => { + if e.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &db_instance.id(), + db_instance.runtime(), + &e, + ) + .await; + } + return Err(e.into()); + } + } + + Ok(()) } - /// Updates an instance's CRDB record based on the result of a call to sled - /// agent that tried to update the instance's state. - /// - /// # Parameters - /// - /// - `db_instance`: The CRDB instance record observed by the caller before - /// it attempted to update the instance's state. - /// - `result`: The result of the relevant sled agent operation. If this is - /// `Ok`, the payload is the updated instance runtime state returned from - /// sled agent, if there was one. + /// Takes an updated instance state returned from a call to sled agent and + /// writes it back to the database. /// /// # Return value /// - /// - `Ok(true)` if the caller supplied an updated instance record and this - /// routine successfully wrote it to CRDB. - /// - `Ok(false)` if the sled agent call succeeded, but this routine did not - /// update CRDB. - /// This can happen either because sled agent didn't return an updated - /// record or because the updated record was superseded by a state update - /// with a more advanced generation number. - /// - `Err` if the sled agent operation failed or this routine received an - /// error while trying to update CRDB. - async fn handle_instance_put_result( + /// - `Ok((instance_updated, vmm_updated))` if no failures occurred. The + /// tuple fields indicate which database records (if any) were updated. + /// Note that it is possible for sled agent not to return an updated + /// instance state from a particular API call. In that case, the `state` + /// parameter is `None` and this routine returns `Ok((false, false))`. + /// - `Err` if an error occurred while writing state to the database. A + /// database operation that succeeds but doesn't update anything (e.g. + /// owing to an outdated generation number) will return `Ok`. + async fn write_returned_instance_state( &self, instance_id: &Uuid, - prev_instance_runtime: &db::model::InstanceRuntimeState, - result: Result< - Option, - sled_agent_client::Error, - >, + state: Option, ) -> Result<(bool, bool), Error> { - slog::debug!(&self.log, "Handling sled agent instance PUT result"; + slog::debug!(&self.log, + "writing instance state returned from sled agent"; "instance_id" => %instance_id, - "result" => ?result); - - match result { - Ok(Some(new_state)) => { - let update_result = self - .db_datastore - .instance_and_vmm_update_runtime( - instance_id, - &new_state.instance_state.into(), - &new_state.propolis_id, - &new_state.vmm_state.into(), - ) - .await; + "new_state" => ?state); - slog::debug!(&self.log, - "Attempted DB update after instance PUT"; - "instance_id" => %instance_id, - "propolis_id" => %new_state.propolis_id, - "result" => ?update_result); - - update_result - } - Ok(None) => Ok((false, false)), - Err(e) => { - // The sled-agent has told us that it can't do what we - // requested, but does that mean a failure? One example would be - // if we try to "reboot" a stopped instance. That shouldn't - // transition the instance to failed. But if the sled-agent - // *can't* boot a stopped instance, that should transition - // to failed. - // - // Without a richer error type, let the sled-agent tell Nexus - // what to do with status codes. - error!(self.log, "received error from instance PUT"; - "instance_id" => %instance_id, - "error" => ?e); + if let Some(state) = state { + let update_result = self + .db_datastore + .instance_and_vmm_update_runtime( + instance_id, + &state.instance_state.into(), + &state.propolis_id, + &state.vmm_state.into(), + ) + .await; - // Convert to the Omicron API error type. - // - // TODO(#3238): This is an extremely lossy conversion: if the - // operation failed without getting a response from sled agent, - // this unconditionally converts to Error::InternalError. - let e = e.into(); - - match &e { - // Bad request shouldn't change the instance state. - Error::InvalidRequest { .. } => Err(e), - - // Internal server error (or anything else) should change - // the instance state to failed, we don't know what state - // the instance is in. - // - // TODO(#4226): This logic needs to be revisited: - // - Some errors that don't get classified as - // Error::InvalidRequest (timeouts, disconnections due to - // network weather, etc.) are not necessarily fatal to the - // instance and shouldn't mark it as Failed. - // - If the instance still has a running VMM, this operation - // won't terminate it or reclaim its resources. (The - // resources will be reclaimed if the sled later reports - // that the VMM is gone, however.) - _ => { - let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new( - InstanceState::Failed, - ), + slog::debug!(&self.log, + "attempted to write instance state from sled agent"; + "instance_id" => %instance_id, + "propolis_id" => %state.propolis_id, + "result" => ?update_result); - // TODO(#4226): Clearing the Propolis ID is required - // to allow the instance to be deleted, but this - // doesn't actually terminate the VMM (see above). - propolis_id: None, - gen: prev_instance_runtime.gen.next().into(), - ..prev_instance_runtime.clone() - }; + update_result + } else { + Ok((false, false)) + } + } - // XXX what if this fails? - let result = self - .db_datastore - .instance_update_runtime(&instance_id, &new_runtime) - .await; + /// Attempts to move an instance from `prev_instance_runtime` to the + /// `Failed` state in response to an error returned from a call to a sled + /// agent instance API, supplied in `reason`. + pub(crate) async fn mark_instance_failed( + &self, + instance_id: &Uuid, + prev_instance_runtime: &db::model::InstanceRuntimeState, + reason: &SledAgentInstancePutError, + ) -> Result<(), Error> { + error!(self.log, "marking instance failed due to sled agent API error"; + "instance_id" => %instance_id, + "error" => ?reason); + + let new_runtime = db::model::InstanceRuntimeState { + nexus_state: db::model::InstanceState::new(InstanceState::Failed), + + // TODO(#4226): Clearing the Propolis ID is required to allow the + // instance to be deleted, but this doesn't actually terminate the + // VMM. + propolis_id: None, + gen: prev_instance_runtime.gen.next().into(), + ..prev_instance_runtime.clone() + }; - error!( - self.log, - "attempted to set instance to Failed after bad put"; + match self + .db_datastore + .instance_update_runtime(&instance_id, &new_runtime) + .await + { + Ok(_) => info!(self.log, "marked instance as Failed"; + "instance_id" => %instance_id), + // XXX: It's not clear what to do with this error; should it be + // bubbled back up to the caller? + Err(e) => error!(self.log, + "failed to write Failed instance state to DB"; "instance_id" => %instance_id, - "result" => ?result, - ); - - Err(e) - } - } - } + "error" => ?e), } + + Ok(()) } /// Lists disks attached to the instance. diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 5149825842..fd86e2052a 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -903,8 +903,7 @@ pub mod test { }; use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use diesel::{ - BoolExpressionMethods, ExpressionMethods, OptionalExtension, QueryDsl, - SelectableHelper, + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::authn::saga::Serialized; @@ -1073,68 +1072,6 @@ pub mod test { .unwrap() } - async fn no_virtual_provisioning_resource_records_exist( - datastore: &DataStore, - ) -> bool { - use nexus_db_queries::db::model::VirtualProvisioningResource; - use nexus_db_queries::db::schema::virtual_provisioning_resource::dsl; - - let conn = datastore.pool_connection_for_tests().await.unwrap(); - - datastore - .transaction_retry_wrapper("no_virtual_provisioning_resource_records_exist") - .transaction(&conn, |conn| async move { - conn - .batch_execute_async(nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL) - .await - .unwrap(); - - Ok( - dsl::virtual_provisioning_resource - .filter(dsl::resource_type.eq(nexus_db_queries::db::model::ResourceTypeProvisioned::Instance.to_string())) - .select(VirtualProvisioningResource::as_select()) - .get_results_async::(&conn) - .await - .unwrap() - .is_empty() - ) - }).await.unwrap() - } - - async fn no_virtual_provisioning_collection_records_using_instances( - datastore: &DataStore, - ) -> bool { - use nexus_db_queries::db::model::VirtualProvisioningCollection; - use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; - - let conn = datastore.pool_connection_for_tests().await.unwrap(); - - datastore - .transaction_retry_wrapper( - "no_virtual_provisioning_collection_records_using_instances", - ) - .transaction(&conn, |conn| async move { - conn.batch_execute_async( - nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, - ) - .await - .unwrap(); - Ok(dsl::virtual_provisioning_collection - .filter( - dsl::cpus_provisioned - .ne(0) - .or(dsl::ram_provisioned.ne(0)), - ) - .select(VirtualProvisioningCollection::as_select()) - .get_results_async::(&conn) - .await - .unwrap() - .is_empty()) - }) - .await - .unwrap() - } - async fn disk_is_detached(datastore: &DataStore) -> bool { use nexus_db_queries::db::model::Disk; use nexus_db_queries::db::schema::disk::dsl; @@ -1170,11 +1107,14 @@ pub mod test { assert!(no_external_ip_records_exist(datastore).await); assert!(no_sled_resource_instance_records_exist(datastore).await); assert!( - no_virtual_provisioning_resource_records_exist(datastore).await + test_helpers::no_virtual_provisioning_resource_records_exist( + cptestctx + ) + .await ); assert!( - no_virtual_provisioning_collection_records_using_instances( - datastore + test_helpers::no_virtual_provisioning_collection_records_using_instances( + cptestctx ) .await ); diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index d32a20bc40..7a417a5781 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -3,7 +3,9 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; -use crate::app::instance::InstanceStateChangeRequest; +use crate::app::instance::{ + InstanceStateChangeError, InstanceStateChangeRequest, +}; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_sled_ipv6, }; @@ -387,28 +389,26 @@ async fn sim_ensure_destination_propolis_undo( "prev_runtime_state" => ?db_instance.runtime()); // Ensure that the destination sled has no Propolis matching the description - // the saga previously generated. - // - // Sled agent guarantees that if an instance is unregistered from a sled - // that does not believe it holds the "active" Propolis for the instance, - // then the sled's copy of the instance record will not change during - // unregistration. This precondition always holds here because the "start - // migration" step is not allowed to unwind once migration has possibly - // started. Not changing the instance is important here because the next - // undo step (clearing migration IDs) needs to advance the instance's - // generation number to succeed. - osagactx + // the saga previously generated. If this succeeds, or if it fails because + // the destination sled no longer knows about this instance, allow the rest + // of unwind to take care of cleaning up the migration IDs in the instance + // record. Otherwise the unwind has failed and manual intervention is + // needed. + match osagactx .nexus() - .instance_ensure_unregistered( - &opctx, - &authz_instance, - &dst_sled_id, - db_instance.runtime(), - ) + .instance_ensure_unregistered(&opctx, &authz_instance, &dst_sled_id) .await - .map_err(ActionError::action_failed)?; - - Ok(()) + { + Ok(_) => Ok(()), + Err(InstanceStateChangeError::SledAgent(inner)) => { + if !inner.instance_unhealthy() { + Ok(()) + } else { + Err(inner.0.into()) + } + } + Err(e) => Err(e.into()), + } } async fn sim_instance_migrate( @@ -454,7 +454,7 @@ async fn sim_instance_migrate( // // Possibly sled agent can help with this by using state or Propolis // generation numbers to filter out stale destruction requests. - osagactx + match osagactx .nexus() .instance_request_state( &opctx, @@ -469,9 +469,30 @@ async fn sim_instance_migrate( ), ) .await - .map_err(ActionError::action_failed)?; + { + Ok(_) => Ok(()), + // Failure to initiate migration to a specific target doesn't entail + // that the entire instance has failed, so handle errors by unwinding + // the saga without otherwise touching the instance's state. + Err(InstanceStateChangeError::SledAgent(inner)) => { + info!(osagactx.log(), + "migration saga: sled agent failed to start migration"; + "instance_id" => %db_instance.id(), + "error" => ?inner); + + Err(ActionError::action_failed( + omicron_common::api::external::Error::from(inner), + )) + } + Err(InstanceStateChangeError::Other(inner)) => { + info!(osagactx.log(), + "migration saga: internal error changing instance state"; + "instance_id" => %db_instance.id(), + "error" => ?inner); - Ok(()) + Err(ActionError::action_failed(inner)) + } + } } #[cfg(test)] diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 76773d6369..e6717b0164 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,6 +10,7 @@ use super::{ instance_common::allocate_sled_ipv6, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::instance::InstanceStateChangeError; use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -52,11 +53,6 @@ declare_saga_actions! { - sis_move_to_starting_undo } - ADD_VIRTUAL_RESOURCES -> "virtual_resources" { - + sis_account_virtual_resources - - sis_account_virtual_resources_undo - } - // TODO(#3879) This can be replaced with an action that triggers the NAT RPW // once such an RPW is available. DPD_ENSURE -> "dpd_ensure" { @@ -74,6 +70,17 @@ declare_saga_actions! { - sis_ensure_registered_undo } + // Only account for the instance's resource consumption when the saga is on + // the brink of actually starting it. This allows prior steps' undo actions + // to change the instance's generation number if warranted (e.g. by moving + // the instance to the Failed state) without disrupting this step's undo + // action (which depends on the instance bearing the same generation number + // at undo time that it had at resource accounting time). + ADD_VIRTUAL_RESOURCES -> "virtual_resources" { + + sis_account_virtual_resources + - sis_account_virtual_resources_undo + } + ENSURE_RUNNING -> "ensure_running" { + sis_ensure_running } @@ -103,10 +110,10 @@ impl NexusSaga for SagaInstanceStart { builder.append(alloc_propolis_ip_action()); builder.append(create_vmm_record_action()); builder.append(mark_as_starting_action()); - builder.append(add_virtual_resources_action()); builder.append(dpd_ensure_action()); builder.append(v2p_ensure_action()); builder.append(ensure_registered_action()); + builder.append(add_virtual_resources_action()); builder.append(ensure_running_action()); Ok(builder.build()?) } @@ -575,18 +582,87 @@ async fn sis_ensure_registered_undo( .await .map_err(ActionError::action_failed)?; - osagactx + // If the sled successfully unregistered the instance, allow the rest of + // saga unwind to restore the instance record to its prior state (without + // writing back the state returned from sled agent). Otherwise, try to + // reason about the next action from the specific kind of error that was + // returned. + if let Err(e) = osagactx .nexus() - .instance_ensure_unregistered( - &opctx, - &authz_instance, - &sled_id, - db_instance.runtime(), - ) + .instance_ensure_unregistered(&opctx, &authz_instance, &sled_id) .await - .map_err(ActionError::action_failed)?; + { + error!(osagactx.log(), + "start saga: failed to unregister instance from sled"; + "instance_id" => %instance_id, + "error" => ?e); + + // If the failure came from talking to sled agent, and the error code + // indicates the instance or sled might be unhealthy, manual + // intervention is likely to be needed, so try to mark the instance as + // Failed and then bail on unwinding. + // + // If sled agent is in good shape but just doesn't know about the + // instance, this saga still owns the instance's state, so allow + // unwinding to continue. + // + // If some other Nexus error occurred, this saga is in bad shape, so + // return an error indicating that intervention is needed without trying + // to modify the instance further. + // + // TODO(#3238): `instance_unhealthy` does not take an especially nuanced + // view of the meanings of the error codes sled agent could return, so + // assuming that an error that isn't `instance_unhealthy` means + // that everything is hunky-dory and it's OK to continue unwinding may + // be a bit of a stretch. See the definition of `instance_unhealthy` for + // more details. + match e { + InstanceStateChangeError::SledAgent(inner) + if inner.instance_unhealthy() => + { + error!(osagactx.log(), + "start saga: failing instance after unregister failure"; + "instance_id" => %instance_id, + "error" => ?inner); + + if let Err(set_failed_error) = osagactx + .nexus() + .mark_instance_failed( + &instance_id, + db_instance.runtime(), + &inner, + ) + .await + { + error!(osagactx.log(), + "start saga: failed to mark instance as failed"; + "instance_id" => %instance_id, + "error" => ?set_failed_error); + + Err(set_failed_error.into()) + } else { + Err(inner.0.into()) + } + } + InstanceStateChangeError::SledAgent(_) => { + info!(osagactx.log(), + "start saga: instance already unregistered from sled"; + "instance_id" => %instance_id); - Ok(()) + Ok(()) + } + InstanceStateChangeError::Other(inner) => { + error!(osagactx.log(), + "start saga: internal error unregistering instance"; + "instance_id" => %instance_id, + "error" => ?inner); + + Err(inner.into()) + } + } + } else { + Ok(()) + } } async fn sis_ensure_running( @@ -615,7 +691,7 @@ async fn sis_ensure_running( .await .map_err(ActionError::action_failed)?; - osagactx + match osagactx .nexus() .instance_request_state( &opctx, @@ -625,9 +701,30 @@ async fn sis_ensure_running( crate::app::instance::InstanceStateChangeRequest::Run, ) .await - .map_err(ActionError::action_failed)?; + { + Ok(_) => Ok(()), + Err(InstanceStateChangeError::SledAgent(inner)) => { + info!(osagactx.log(), + "start saga: sled agent failed to set instance to running"; + "instance_id" => %instance_id, + "sled_id" => %sled_id, + "error" => ?inner); + + // Don't set the instance to Failed in this case. Instead, allow + // the saga to unwind and restore the instance to the Stopped + // state (matching what would happen if there were a failure + // prior to this point). + Err(ActionError::action_failed(Error::from(inner))) + } + Err(InstanceStateChangeError::Other(inner)) => { + info!(osagactx.log(), + "start saga: internal error changing instance state"; + "instance_id" => %instance_id, + "error" => ?inner); - Ok(()) + Err(ActionError::action_failed(inner)) + } + } } #[cfg(test)] @@ -776,6 +873,9 @@ mod test { new_db_instance.runtime().nexus_state.0, InstanceState::Stopped ); + + assert!(test_helpers::no_virtual_provisioning_resource_records_exist(cptestctx).await); + assert!(test_helpers::no_virtual_provisioning_collection_records_using_instances(cptestctx).await); } }) }, @@ -818,4 +918,80 @@ mod test { assert_eq!(vmm_state, InstanceState::Running); } + + /// Tests that if a start saga unwinds because sled agent returned failure + /// from a call to ensure the instance was running, then the system returns + /// to the correct state. + /// + /// This is different from `test_action_failure_can_unwind` because that + /// test causes saga nodes to "fail" without actually executing anything, + /// whereas this test injects a failure into the normal operation of the + /// ensure-running node. + #[nexus_test(server = crate::Server)] + async fn test_ensure_running_unwind(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx().nexus; + let _project_id = setup_test_project(&client).await; + let opctx = test_helpers::test_opctx(cptestctx); + let instance = create_instance(client).await; + let db_instance = + test_helpers::instance_fetch(cptestctx, instance.identity.id) + .await + .instance() + .clone(); + + let params = Params { + serialized_authn: authn::saga::Serialized::for_opctx(&opctx), + db_instance, + }; + + let dag = create_saga_dag::(params).unwrap(); + + // The ensure_running node is last in the saga. This should be the node + // where the failure ultimately occurs. + let last_node_name = dag + .get_nodes() + .last() + .expect("saga should have at least one node") + .name() + .clone(); + + // Inject failure at the simulated sled agent level. This allows the + // ensure-running node to attempt to change the instance's state, but + // forces this operation to fail and produce whatever side effects + // result from that failure. + let sled_agent = &cptestctx.sled_agent.sled_agent; + sled_agent + .set_instance_ensure_state_error(Some(Error::internal_error( + "injected by test_ensure_running_unwind", + ))) + .await; + + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + let saga_error = nexus + .run_saga_raw_result(saga) + .await + .expect("saga execution should have started") + .kind + .expect_err("saga should fail due to injected error"); + + assert_eq!(saga_error.error_node_name, last_node_name); + + let db_instance = + test_helpers::instance_fetch(cptestctx, instance.identity.id).await; + + assert_eq!( + db_instance.instance().runtime_state.nexus_state, + nexus_db_model::InstanceState(InstanceState::Stopped) + ); + assert!(db_instance.vmm().is_none()); + + assert!( + test_helpers::no_virtual_provisioning_resource_records_exist( + cptestctx + ) + .await + ); + assert!(test_helpers::no_virtual_provisioning_collection_records_using_instances(cptestctx).await); + } } diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index 3110bd318a..1b383d27bb 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -11,7 +11,9 @@ use crate::{ Nexus, }; use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; -use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use diesel::{ + BoolExpressionMethods, ExpressionMethods, QueryDsl, SelectableHelper, +}; use futures::future::BoxFuture; use nexus_db_queries::{ authz, @@ -186,6 +188,68 @@ pub async fn instance_fetch( db_state } +pub async fn no_virtual_provisioning_resource_records_exist( + cptestctx: &ControlPlaneTestContext, +) -> bool { + use nexus_db_queries::db::model::VirtualProvisioningResource; + use nexus_db_queries::db::schema::virtual_provisioning_resource::dsl; + + let datastore = cptestctx.server.apictx().nexus.datastore().clone(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + datastore + .transaction_retry_wrapper("no_virtual_provisioning_resource_records_exist") + .transaction(&conn, |conn| async move { + conn + .batch_execute_async(nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL) + .await + .unwrap(); + + Ok( + dsl::virtual_provisioning_resource + .filter(dsl::resource_type.eq(nexus_db_queries::db::model::ResourceTypeProvisioned::Instance.to_string())) + .select(VirtualProvisioningResource::as_select()) + .get_results_async::(&conn) + .await + .unwrap() + .is_empty() + ) + }).await.unwrap() +} + +pub async fn no_virtual_provisioning_collection_records_using_instances( + cptestctx: &ControlPlaneTestContext, +) -> bool { + use nexus_db_queries::db::model::VirtualProvisioningCollection; + use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; + + let datastore = cptestctx.server.apictx().nexus.datastore().clone(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + datastore + .transaction_retry_wrapper( + "no_virtual_provisioning_collection_records_using_instances", + ) + .transaction(&conn, |conn| async move { + conn.batch_execute_async( + nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, + ) + .await + .unwrap(); + Ok(dsl::virtual_provisioning_collection + .filter( + dsl::cpus_provisioned.ne(0).or(dsl::ram_provisioned.ne(0)), + ) + .select(VirtualProvisioningCollection::as_select()) + .get_results_async::(&conn) + .await + .unwrap() + .is_empty()) + }) + .await + .unwrap() +} + /// Tests that the saga described by `dag` succeeds if each of its nodes is /// repeated. /// diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index 8dae31863c..bbc3e440ab 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -217,6 +217,16 @@ impl SimCollection { } } + /// Forcibly removes the object `id` from the collection without simulating + /// any further state changes for it. + pub async fn sim_force_remove(&self, id: Uuid) { + let mut objects = self.objects.lock().await; + let object = objects.remove(&id).unwrap(); + if let Some(mut tx) = object.channel_tx { + tx.close_channel(); + } + } + /// Complete a desired asynchronous state transition for object `id`. /// This is invoked either by `sim_step()` (if the simulation mode is /// `SimMode::Auto`) or `instance_finish_transition` (if the simulation mode diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index c06ae96f2e..a16049dd2f 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -68,6 +68,8 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, + + instance_ensure_state_error: Mutex>, } fn extract_targets_from_volume_construction_request( @@ -159,6 +161,7 @@ impl SledAgent { disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), + instance_ensure_state_error: Mutex::new(None), }) } @@ -343,15 +346,7 @@ impl SledAgent { updated_runtime: Some(instance.terminate()), }; - // Poke the now-destroyed instance to force it to be removed from the - // collection. - // - // TODO: In the real sled agent, this happens inline without publishing - // any other state changes, whereas this call causes any pending state - // changes to be published. This can be fixed by adding a simulated - // object collection function to forcibly remove an object from a - // collection. - self.instances.sim_poke(instance_id, PokeMode::Drain).await; + self.instances.sim_force_remove(instance_id).await; Ok(response) } @@ -361,6 +356,11 @@ impl SledAgent { instance_id: Uuid, state: InstanceStateRequested, ) -> Result { + if let Some(e) = self.instance_ensure_state_error.lock().await.as_ref() + { + return Err(e.clone()); + } + let current = match self.instances.sim_get_cloned_object(&instance_id).await { Ok(i) => i.current().clone(), @@ -416,6 +416,10 @@ impl SledAgent { Ok(InstancePutStateResponse { updated_runtime: Some(new_state) }) } + pub async fn set_instance_ensure_state_error(&self, error: Option) { + *self.instance_ensure_state_error.lock().await = error; + } + async fn detach_disks_from_instance( &self, instance_id: Uuid, From 1a7863c8edf69115926564f5e53e67169370c97d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 05:25:09 +0000 Subject: [PATCH 096/186] Update taiki-e/install-action digest to 0f94aa2 (#4684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`6ee6c3a` -> `0f94aa2`](https://togithub.com/taiki-e/install-action/compare/6ee6c3a...0f94aa2) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index b16f1ca9d7..e70b959f8a 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@6ee6c3ab83eab434138dfa928d72abc7eae14793 # v2 + uses: taiki-e/install-action@0f94aa2032e24d01f7ae1cc63f71b13418365efd # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 877a886e3ce33e0b1bf93bf479887d9f9c25af2a Mon Sep 17 00:00:00 2001 From: Justin Bennett Date: Wed, 13 Dec 2023 01:19:17 -0500 Subject: [PATCH 097/186] Add resource limits (#4605) This PR aims to introduce `quotas` as a concept into Nexus for allowing operators to enforce virtual resource limits at the silo level. The initial implementation will be limited to checks during instance start, disk creation, and snapshot creation. We will _not_ being doing advanced quota recalculation as system resources change. We will _not yet_ be enforcing intelligent quota caps where the sum of all quotas must be less than the theoretical available system virtual resources. The implementation of this functionality is shaped by [RFD-427](https://rfd.shared.oxide.computer/rfd/0427) but some desired functionality will be deferred given time/complexity constraints. Longer term I believe the shape of quotas and perhaps even their relationship to silos may change. This PR implements a simplified version that matches closely to how the virtual resource provisioning tables are already built out. I know there's some oddness around the shape of the quotas table with it not having its own ID and otherwise being mildly divergent from other resources, but this was largely to ensure we could migrate to another solution _and_ not overcomplicate the initial implementation. ## TODO - [x] Add quota creation as a step of silo creation - [x] Add initialization checks in CTEs for instance create, etc to only proceed when quota unmet - [x] Wire up CTE sentinels in upstream callsites - [x] Add backfill migration for existing customers - [x] Add tests for quota enforcement - [x] Delete the quotas when the silo is deleted --------- Co-authored-by: Sean Klein --- common/src/api/external/mod.rs | 1 + end-to-end-tests/src/bin/bootstrap.rs | 17 +- end-to-end-tests/src/helpers/ctx.rs | 2 +- nexus/db-model/src/lib.rs | 2 + .../virtual_provisioning_collection_update.rs | 22 ++ nexus/db-model/src/quota.rs | 109 ++++++ nexus/db-model/src/schema.rs | 13 +- nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/db-queries/src/db/datastore/quota.rs | 127 +++++++ nexus/db-queries/src/db/datastore/rack.rs | 2 + nexus/db-queries/src/db/datastore/silo.rs | 46 ++- .../virtual_provisioning_collection.rs | 10 +- nexus/db-queries/src/db/fixed_data/silo.rs | 5 + .../virtual_provisioning_collection_update.rs | 228 ++++++++++++- nexus/src/app/external_endpoints.rs | 1 + nexus/src/app/mod.rs | 1 + nexus/src/app/quota.rs | 49 +++ nexus/src/app/rack.rs | 4 + nexus/src/external_api/http_entrypoints.rs | 92 +++++- nexus/test-utils/src/resource_helpers.rs | 1 + nexus/tests/integration_tests/certificates.rs | 10 + nexus/tests/integration_tests/endpoints.rs | 24 ++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/quotas.rs | 312 ++++++++++++++++++ nexus/tests/integration_tests/silos.rs | 7 + nexus/tests/output/nexus_tags.txt | 3 + nexus/types/src/external_api/params.rs | 54 +++ nexus/types/src/external_api/views.rs | 8 + openapi/nexus.json | 261 +++++++++++++++ schema/crdb/20.0.0/up01.sql | 8 + schema/crdb/20.0.0/up02.sql | 28 ++ schema/crdb/dbinit.sql | 11 +- 32 files changed, 1429 insertions(+), 31 deletions(-) create mode 100644 nexus/db-model/src/quota.rs create mode 100644 nexus/db-queries/src/db/datastore/quota.rs create mode 100644 nexus/src/app/quota.rs create mode 100644 nexus/tests/integration_tests/quotas.rs create mode 100644 schema/crdb/20.0.0/up01.sql create mode 100644 schema/crdb/20.0.0/up02.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a6d729593b..64a2e462ec 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -702,6 +702,7 @@ pub enum ResourceType { Silo, SiloUser, SiloGroup, + SiloQuotas, IdentityProvider, SamlIdentityProvider, SshKey, diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index 83a37b8c21..9ddd872bc2 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -4,11 +4,11 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, - DiskCreate, DiskSource, IpRange, Ipv4Range, + DiskCreate, DiskSource, IpRange, Ipv4Range, SiloQuotasUpdate, }; use oxide_client::{ ClientDisksExt, ClientHiddenExt, ClientProjectsExt, - ClientSystemNetworkingExt, + ClientSystemNetworkingExt, ClientSystemSilosExt, }; use serde::{de::DeserializeOwned, Deserialize}; use std::time::Duration; @@ -45,6 +45,19 @@ async fn main() -> Result<()> { .send() .await?; + // ===== SET UP QUOTAS ===== // + eprintln!("setting up quotas..."); + client + .silo_quotas_update() + .silo("recovery") + .body(SiloQuotasUpdate { + cpus: Some(16), + memory: Some(ByteCount(1024 * 1024 * 1024 * 10)), + storage: Some(ByteCount(1024 * 1024 * 1024 * 1024)), + }) + .send() + .await?; + // ===== ENSURE DATASETS ARE READY ===== // eprintln!("ensuring datasets are ready..."); let ctx = Context::from_client(client).await?; diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 2c66bd4724..0132feafeb 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -78,7 +78,7 @@ fn rss_config() -> Result { let content = std::fs::read_to_string(&path).unwrap_or(RSS_CONFIG_STR.to_string()); toml::from_str(&content) - .with_context(|| format!("parsing config-rss as TOML")) + .with_context(|| "parsing config-rss as TOML".to_string()) } fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 43bf83fd34..908f6f2368 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -55,6 +55,7 @@ mod system_update; // for join-based marker trait generation. mod ipv4_nat_entry; pub mod queries; +mod quota; mod rack; mod region; mod region_snapshot; @@ -139,6 +140,7 @@ pub use physical_disk::*; pub use physical_disk_kind::*; pub use producer_endpoint::*; pub use project::*; +pub use quota::*; pub use rack::*; pub use region::*; pub use region_snapshot::*; diff --git a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs index 6c684016b4..124ffe4db6 100644 --- a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs @@ -8,6 +8,7 @@ //! for the construction of this query. use crate::schema::silo; +use crate::schema::silo_quotas; use crate::schema::virtual_provisioning_collection; table! { @@ -28,11 +29,32 @@ table! { } } +table! { + quotas (silo_id) { + silo_id -> Uuid, + cpus -> Int8, + memory -> Int8, + storage -> Int8, + } +} + +table! { + silo_provisioned { + id -> Uuid, + virtual_disk_bytes_provisioned -> Int8, + cpus_provisioned -> Int8, + ram_provisioned -> Int8, + } +} + diesel::allow_tables_to_appear_in_same_query!(silo, parent_silo,); diesel::allow_tables_to_appear_in_same_query!( virtual_provisioning_collection, + silo_quotas, parent_silo, all_collections, do_update, + quotas, + silo_provisioned ); diff --git a/nexus/db-model/src/quota.rs b/nexus/db-model/src/quota.rs new file mode 100644 index 0000000000..70a8ffa1fd --- /dev/null +++ b/nexus/db-model/src/quota.rs @@ -0,0 +1,109 @@ +use super::ByteCount; +use crate::schema::silo_quotas; +use chrono::{DateTime, Utc}; +use nexus_types::external_api::{params, views}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Serialize, + Deserialize, + AsChangeset, +)] +#[diesel(table_name = silo_quotas)] +pub struct SiloQuotas { + pub silo_id: Uuid, + pub time_created: DateTime, + pub time_modified: DateTime, + + /// The number of CPUs that this silo is allowed to use + pub cpus: i64, + + /// The amount of memory (in bytes) that this silo is allowed to use + #[diesel(column_name = memory_bytes)] + pub memory: ByteCount, + + /// The amount of storage (in bytes) that this silo is allowed to use + #[diesel(column_name = storage_bytes)] + pub storage: ByteCount, +} + +impl SiloQuotas { + pub fn new( + silo_id: Uuid, + cpus: i64, + memory: ByteCount, + storage: ByteCount, + ) -> Self { + Self { + silo_id, + time_created: Utc::now(), + time_modified: Utc::now(), + cpus, + memory, + storage, + } + } + + pub fn arbitrarily_high_default(silo_id: Uuid) -> Self { + let count = params::SiloQuotasCreate::arbitrarily_high_default(); + Self::new( + silo_id, + count.cpus, + count.memory.into(), + count.storage.into(), + ) + } +} + +impl From for views::SiloQuotas { + fn from(silo_quotas: SiloQuotas) -> Self { + Self { + silo_id: silo_quotas.silo_id, + cpus: silo_quotas.cpus, + memory: silo_quotas.memory.into(), + storage: silo_quotas.storage.into(), + } + } +} + +impl From for SiloQuotas { + fn from(silo_quotas: views::SiloQuotas) -> Self { + Self { + silo_id: silo_quotas.silo_id, + time_created: Utc::now(), + time_modified: Utc::now(), + cpus: silo_quotas.cpus, + memory: silo_quotas.memory.into(), + storage: silo_quotas.storage.into(), + } + } +} + +// Describes a set of updates for the [`SiloQuotas`] model. +#[derive(AsChangeset)] +#[diesel(table_name = silo_quotas)] +pub struct SiloQuotasUpdate { + pub cpus: Option, + #[diesel(column_name = memory_bytes)] + pub memory: Option, + #[diesel(column_name = storage_bytes)] + pub storage: Option, + pub time_modified: DateTime, +} + +impl From for SiloQuotasUpdate { + fn from(params: params::SiloQuotasUpdate) -> Self { + Self { + cpus: params.cpus, + memory: params.memory.map(|f| f.into()), + storage: params.storage.map(|f| f.into()), + time_modified: Utc::now(), + } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 51501b4894..10fa8dcfac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -409,6 +409,17 @@ table! { } } +table! { + silo_quotas(silo_id) { + silo_id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + cpus -> Int8, + memory_bytes -> Int8, + storage_bytes -> Int8, + } +} + table! { network_interface (id) { id -> Uuid, @@ -1322,7 +1333,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(19, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(20, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 761c3f995f..1609fc7101 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -68,6 +68,7 @@ mod network_interface; mod oximeter; mod physical_disk; mod project; +mod quota; mod rack; mod region; mod region_snapshot; diff --git a/nexus/db-queries/src/db/datastore/quota.rs b/nexus/db-queries/src/db/datastore/quota.rs new file mode 100644 index 0000000000..2066781e6b --- /dev/null +++ b/nexus/db-queries/src/db/datastore/quota.rs @@ -0,0 +1,127 @@ +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pool::DbConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use nexus_db_model::SiloQuotas; +use nexus_db_model::SiloQuotasUpdate; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; +use uuid::Uuid; + +impl DataStore { + /// Creates new quotas for a silo. This is grouped with silo creation + /// and shouldn't be called outside of that flow. + /// + /// An authz check _cannot_ be performed here because the authz initialization + /// isn't complete and will lead to a db deadlock. + /// + /// See + pub async fn silo_quotas_create( + &self, + conn: &async_bb8_diesel::Connection, + authz_silo: &authz::Silo, + quotas: SiloQuotas, + ) -> Result<(), Error> { + let silo_id = authz_silo.id(); + use db::schema::silo_quotas::dsl; + + diesel::insert_into(dsl::silo_quotas) + .values(quotas) + .execute_async(conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SiloQuotas, + &silo_id.to_string(), + ), + ) + }) + .map(|_| ()) + } + + pub async fn silo_quotas_delete( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + authz_silo: &authz::Silo, + ) -> DeleteResult { + // Given that the quotas right now are somewhat of an extension of the + // Silo we just check for delete permission on the silo itself. + opctx.authorize(authz::Action::Delete, authz_silo).await?; + + use db::schema::silo_quotas::dsl; + diesel::delete(dsl::silo_quotas) + .filter(dsl::silo_id.eq(authz_silo.id())) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn silo_update_quota( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + updates: SiloQuotasUpdate, + ) -> UpdateResult { + opctx.authorize(authz::Action::Modify, authz_silo).await?; + use db::schema::silo_quotas::dsl; + let silo_id = authz_silo.id(); + diesel::update(dsl::silo_quotas) + .filter(dsl::silo_id.eq(silo_id)) + .set(updates) + .returning(SiloQuotas::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SiloQuotas, + &silo_id.to_string(), + ), + ) + }) + } + + pub async fn silo_quotas_view( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_silo).await?; + use db::schema::silo_quotas::dsl; + dsl::silo_quotas + .filter(dsl::silo_id.eq(authz_silo.id())) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn fleet_list_quotas( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::silo_quotas::dsl; + paginated(dsl::silo_quotas, dsl::silo_id, pagparams) + .select(SiloQuotas::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } +} diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index a69386cfd0..728da0b0d1 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -912,6 +912,8 @@ mod test { name: "test-silo".parse().unwrap(), description: String::new(), }, + // Set a default quota of a half rack's worth of resources + quotas: external_params::SiloQuotasCreate::arbitrarily_high_default(), discoverable: false, identity_mode: SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 437c171fb0..2c0c5f3c47 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -28,6 +28,7 @@ use chrono::Utc; use diesel::prelude::*; use nexus_db_model::Certificate; use nexus_db_model::ServiceKind; +use nexus_db_model::SiloQuotas; use nexus_types::external_api::params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::SiloRole; @@ -61,13 +62,32 @@ impl DataStore { debug!(opctx.log, "attempting to create built-in silos"); use db::schema::silo::dsl; - let count = diesel::insert_into(dsl::silo) - .values([&*DEFAULT_SILO, &*INTERNAL_SILO]) - .on_conflict(dsl::id) - .do_nothing() - .execute_async(&*self.pool_connection_authorized(opctx).await?) + use db::schema::silo_quotas::dsl as quotas_dsl; + let count = self + .pool_connection_authorized(opctx) + .await? + .transaction_async(|conn| async move { + diesel::insert_into(quotas_dsl::silo_quotas) + .values(SiloQuotas::arbitrarily_high_default( + DEFAULT_SILO.id(), + )) + .on_conflict(quotas_dsl::silo_id) + .do_nothing() + .execute_async(&conn) + .await + .map_err(TransactionError::CustomError) + .unwrap(); + diesel::insert_into(dsl::silo) + .values([&*DEFAULT_SILO, &*INTERNAL_SILO]) + .on_conflict(dsl::id) + .do_nothing() + .execute_async(&conn) + .await + .map_err(TransactionError::CustomError) + }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .unwrap(); + info!(opctx.log, "created {} built-in silos", count); self.virtual_provisioning_collection_create( @@ -263,6 +283,18 @@ impl DataStore { self.dns_update(nexus_opctx, &conn, dns_update).await?; + self.silo_quotas_create( + &conn, + &authz_silo, + SiloQuotas::new( + authz_silo.id(), + new_silo_params.quotas.cpus, + new_silo_params.quotas.memory.into(), + new_silo_params.quotas.storage.into(), + ), + ) + .await?; + Ok::>(silo) }) .await?; @@ -380,6 +412,8 @@ impl DataStore { ))); } + self.silo_quotas_delete(opctx, &conn, &authz_silo).await?; + self.virtual_provisioning_collection_delete_on_connection( &opctx.log, &conn, id, ) diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 230c3941ff..348d277ddf 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -195,7 +195,9 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + crate::db::queries::virtual_provisioning_collection_update::from_diesel(e) + })?; self.virtual_provisioning_collection_producer .append_disk_metrics(&provisions)?; Ok(provisions) @@ -249,7 +251,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_disk_metrics(&provisions)?; Ok(provisions) @@ -270,7 +272,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_cpu_metrics(&provisions)?; Ok(provisions) @@ -300,7 +302,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_cpu_metrics(&provisions)?; Ok(provisions) diff --git a/nexus/db-queries/src/db/fixed_data/silo.rs b/nexus/db-queries/src/db/fixed_data/silo.rs index d32c4211e9..6eba849ee3 100644 --- a/nexus/db-queries/src/db/fixed_data/silo.rs +++ b/nexus/db-queries/src/db/fixed_data/silo.rs @@ -24,6 +24,9 @@ lazy_static! { name: "default-silo".parse().unwrap(), description: "default silo".to_string(), }, + // This quota is actually _unused_ because the default silo + // isn't constructed in the same way a normal silo would be. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -49,6 +52,8 @@ lazy_static! { name: "oxide-internal".parse().unwrap(), description: "Built-in internal Silo.".to_string(), }, + // The internal silo contains no virtual resources, so it has no allotted capacity. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 0a383eb6f1..7672d5af9a 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -13,16 +13,69 @@ use crate::db::pool::DbConnection; use crate::db::schema::virtual_provisioning_collection; use crate::db::schema::virtual_provisioning_resource; use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; +use crate::db::true_or_cast_error::matches_sentinel; +use crate::db::true_or_cast_error::TrueOrCastError; use db_macros::Subquery; use diesel::pg::Pg; use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; +use diesel::result::Error as DieselError; use diesel::{ - sql_types, CombineDsl, ExpressionMethods, IntoSql, - NullableExpressionMethods, QueryDsl, RunQueryDsl, SelectableHelper, + sql_types, BoolExpressionMethods, CombineDsl, ExpressionMethods, IntoSql, + JoinOnDsl, NullableExpressionMethods, QueryDsl, RunQueryDsl, + SelectableHelper, }; use nexus_db_model::queries::virtual_provisioning_collection_update::{ - all_collections, do_update, parent_silo, + all_collections, do_update, parent_silo, quotas, silo_provisioned, }; +use omicron_common::api::external; +use omicron_common::api::external::MessagePair; + +const NOT_ENOUGH_CPUS_SENTINEL: &'static str = "Not enough cpus"; +const NOT_ENOUGH_MEMORY_SENTINEL: &'static str = "Not enough memory"; +const NOT_ENOUGH_STORAGE_SENTINEL: &'static str = "Not enough storage"; + +/// Translates a generic pool error to an external error based +/// on messages which may be emitted when provisioning virtual resources +/// such as instances and disks. +pub fn from_diesel(e: DieselError) -> external::Error { + use crate::db::error; + + let sentinels = [ + NOT_ENOUGH_CPUS_SENTINEL, + NOT_ENOUGH_MEMORY_SENTINEL, + NOT_ENOUGH_STORAGE_SENTINEL, + ]; + if let Some(sentinel) = matches_sentinel(&e, &sentinels) { + match sentinel { + NOT_ENOUGH_CPUS_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "vCPU Limit Exceeded: Not enough vCPUs to complete request. Either stop unused instances to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate an instance but the virtual provisioning resource table indicated that there were not enough CPUs available to satisfy the request.".to_string(), + ) + } + } + NOT_ENOUGH_MEMORY_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "Memory Limit Exceeded: Not enough memory to complete request. Either stop unused instances to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate an instance but the virtual provisioning resource table indicated that there were not enough RAM available to satisfy the request.".to_string(), + ) + } + } + NOT_ENOUGH_STORAGE_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "Storage Limit Exceeded: Not enough storage to complete request. Either remove unneeded disks and snapshots to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate a disk or snapshot but the virtual provisioning resource table indicated that there were not enough storage available to satisfy the request.".to_string(), + ) + } + } + _ => {} + } + } + error::public_error_from_diesel(e, error::ErrorHandler::Server) +} #[derive(Subquery, QueryId)] #[subquery(name = parent_silo)] @@ -82,20 +135,86 @@ struct DoUpdate { } impl DoUpdate { - fn new_for_insert(id: uuid::Uuid) -> Self { + fn new_for_insert( + silo_provisioned: &SiloProvisioned, + quotas: &Quotas, + resource: VirtualProvisioningResource, + ) -> Self { use virtual_provisioning_resource::dsl; + let cpus_provisioned_delta = + resource.cpus_provisioned.into_sql::(); + let memory_provisioned_delta = + i64::from(resource.ram_provisioned).into_sql::(); + let storage_provisioned_delta = + i64::from(resource.virtual_disk_bytes_provisioned) + .into_sql::(); + let not_allocted = dsl::virtual_provisioning_resource - .find(id) + .find(resource.id) .count() .single_value() .assume_not_null() .eq(0); + let has_sufficient_cpus = quotas + .query_source() + .select(quotas::cpus) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::cpus_provisioned) + .single_value() + .assume_not_null() + + cpus_provisioned_delta); + + let has_sufficient_memory = quotas + .query_source() + .select(quotas::memory) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::ram_provisioned) + .single_value() + .assume_not_null() + + memory_provisioned_delta); + + let has_sufficient_storage = quotas + .query_source() + .select(quotas::storage) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::virtual_disk_bytes_provisioned) + .single_value() + .assume_not_null() + + storage_provisioned_delta); + Self { query: Box::new(diesel::select((ExpressionAlias::new::< do_update::update, - >(not_allocted),))), + >( + not_allocted + .and(TrueOrCastError::new( + cpus_provisioned_delta.eq(0).or(has_sufficient_cpus), + NOT_ENOUGH_CPUS_SENTINEL, + )) + .and(TrueOrCastError::new( + memory_provisioned_delta + .eq(0) + .or(has_sufficient_memory), + NOT_ENOUGH_MEMORY_SENTINEL, + )) + .and(TrueOrCastError::new( + storage_provisioned_delta + .eq(0) + .or(has_sufficient_storage), + NOT_ENOUGH_STORAGE_SENTINEL, + )), + ),))), } } @@ -161,6 +280,67 @@ impl UpdatedProvisions { } } +#[derive(Subquery, QueryId)] +#[subquery(name = quotas)] +struct Quotas { + query: Box>, +} + +impl Quotas { + // TODO: We could potentially skip this in cases where we know we're removing a resource instead of inserting + fn new(parent_silo: &ParentSilo) -> Self { + use crate::db::schema::silo_quotas::dsl; + Self { + query: Box::new( + dsl::silo_quotas + .inner_join( + parent_silo + .query_source() + .on(dsl::silo_id.eq(parent_silo::id)), + ) + .select(( + dsl::silo_id, + dsl::cpus, + ExpressionAlias::new::( + dsl::memory_bytes, + ), + ExpressionAlias::new::( + dsl::storage_bytes, + ), + )), + ), + } + } +} + +#[derive(Subquery, QueryId)] +#[subquery(name = silo_provisioned)] +struct SiloProvisioned { + query: Box>, +} + +impl SiloProvisioned { + fn new(parent_silo: &ParentSilo) -> Self { + use virtual_provisioning_collection::dsl; + Self { + query: Box::new( + dsl::virtual_provisioning_collection + .inner_join( + parent_silo + .query_source() + .on(dsl::id.eq(parent_silo::id)), + ) + .select(( + dsl::id, + dsl::cpus_provisioned, + dsl::ram_provisioned, + dsl::virtual_disk_bytes_provisioned, + )), + ), + } + } +} + // This structure wraps a query, such that it can be used within a CTE. // // It generates a name that can be used by the "CteBuilder", but does not @@ -195,6 +375,15 @@ where } } +/// The virtual resource collection is only updated when a resource is inserted +/// or deleted from the resource provisioning table. By probing for the presence +/// or absence of a resource, we can update collections at the same time as we +/// create or destroy the resource, which helps make the operation idempotent. +enum UpdateKind { + Insert(VirtualProvisioningResource), + Delete(uuid::Uuid), +} + /// Constructs a CTE for updating resource provisioning information in all /// collections for a particular object. #[derive(QueryId)] @@ -220,7 +409,7 @@ impl VirtualProvisioningCollectionUpdate { // - values: The updated values to propagate through collections (iff // "do_update" evaluates to "true"). fn apply_update( - do_update: DoUpdate, + update_kind: UpdateKind, update: U, project_id: uuid::Uuid, values: V, @@ -237,6 +426,17 @@ impl VirtualProvisioningCollectionUpdate { &parent_silo, *crate::db::fixed_data::FLEET_ID, ); + + let quotas = Quotas::new(&parent_silo); + let silo_provisioned = SiloProvisioned::new(&parent_silo); + + let do_update = match update_kind { + UpdateKind::Insert(resource) => { + DoUpdate::new_for_insert(&silo_provisioned, "as, resource) + } + UpdateKind::Delete(id) => DoUpdate::new_for_delete(id), + }; + let updated_collections = UpdatedProvisions::new(&all_collections, &do_update, values); @@ -251,6 +451,8 @@ impl VirtualProvisioningCollectionUpdate { let cte = CteBuilder::new() .add_subquery(parent_silo) .add_subquery(all_collections) + .add_subquery(quotas) + .add_subquery(silo_provisioned) .add_subquery(do_update) .add_subquery(update) .add_subquery(updated_collections) @@ -273,8 +475,7 @@ impl VirtualProvisioningCollectionUpdate { provision.virtual_disk_bytes_provisioned = disk_byte_diff; Self::apply_update( - // We should insert the record if it does not already exist. - DoUpdate::new_for_insert(id), + UpdateKind::Insert(provision.clone()), // The query to actually insert the record. UnreferenceableSubquery( diesel::insert_into( @@ -305,8 +506,7 @@ impl VirtualProvisioningCollectionUpdate { use virtual_provisioning_resource::dsl as resource_dsl; Self::apply_update( - // We should delete the record if it exists. - DoUpdate::new_for_delete(id), + UpdateKind::Delete(id), // The query to actually delete the record. UnreferenceableSubquery( diesel::delete(resource_dsl::virtual_provisioning_resource) @@ -342,8 +542,7 @@ impl VirtualProvisioningCollectionUpdate { provision.ram_provisioned = ram_diff; Self::apply_update( - // We should insert the record if it does not already exist. - DoUpdate::new_for_insert(id), + UpdateKind::Insert(provision.clone()), // The query to actually insert the record. UnreferenceableSubquery( diesel::insert_into( @@ -378,8 +577,7 @@ impl VirtualProvisioningCollectionUpdate { use virtual_provisioning_resource::dsl as resource_dsl; Self::apply_update( - // We should delete the record if it exists. - DoUpdate::new_for_delete(id), + UpdateKind::Delete(id), // The query to actually delete the record. // // The filter condition here ensures that the provisioning record is diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index 1ab33c5c9c..0a6dd41db6 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -827,6 +827,7 @@ mod test { name: name.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode, admin_group_name: None, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d4c2d596f8..b92714a365 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -51,6 +51,7 @@ mod metrics; mod network_interface; mod oximeter; mod project; +mod quota; mod rack; pub(crate) mod saga; mod session; diff --git a/nexus/src/app/quota.rs b/nexus/src/app/quota.rs new file mode 100644 index 0000000000..f59069a9ab --- /dev/null +++ b/nexus/src/app/quota.rs @@ -0,0 +1,49 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Resource limits and system quotas + +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db; +use nexus_db_queries::db::lookup; +use nexus_types::external_api::params; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::UpdateResult; +use uuid::Uuid; + +impl super::Nexus { + pub async fn silo_quotas_view( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + ) -> Result { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Read).await?; + self.db_datastore.silo_quotas_view(opctx, &authz_silo).await + } + + pub(crate) async fn fleet_list_quotas( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + self.db_datastore.fleet_list_quotas(opctx, pagparams).await + } + + pub(crate) async fn silo_update_quota( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + updates: ¶ms::SiloQuotasUpdate, + ) -> UpdateResult { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore + .silo_update_quota(opctx, &authz_silo, updates.clone().into()) + .await + } +} diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 1643ac301d..168e9eeaa3 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -203,6 +203,10 @@ impl super::Nexus { name: request.recovery_silo.silo_name, description: "built-in recovery Silo".to_string(), }, + // The recovery silo is initialized with no allocated capacity given it's + // not intended to be used to deploy workloads. Operators can add capacity + // after the fact if they want to use it for that purpose. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a6fd7a3ccb..6720f95c39 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -45,7 +45,7 @@ use nexus_db_queries::db::model::Name; use nexus_db_queries::{ authz::ApiResource, db::fixed_data::silo::INTERNAL_SILO_ID, }; -use nexus_types::external_api::params::ProjectSelector; +use nexus_types::external_api::{params::ProjectSelector, views::SiloQuotas}; use nexus_types::{ external_api::views::{SledInstance, Switch}, identity::AssetIdentityMetadata, @@ -280,6 +280,11 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(silo_policy_view)?; api.register(silo_policy_update)?; + api.register(system_quotas_list)?; + + api.register(silo_quotas_view)?; + api.register(silo_quotas_update)?; + api.register(silo_identity_provider_list)?; api.register(saml_identity_provider_create)?; @@ -510,6 +515,91 @@ async fn policy_update( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Lists resource quotas for all silos +#[endpoint { + method = GET, + path = "/v1/system/silo-quotas", + tags = ["system/silos"], +}] +async fn system_quotas_list( + rqctx: RequestContext>, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let quotas = nexus + .fleet_list_quotas(&opctx, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + quotas, + &|_, quota: &SiloQuotas| quota.silo_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// View the resource quotas of a given silo +#[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], +}] +async fn silo_quotas_view( + rqctx: RequestContext>, + path_params: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus.silo_quotas_view(&opctx, &silo_lookup).await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Update the resource quotas of a given silo +/// +/// If a quota value is not specified, it will remain unchanged. +#[endpoint { + method = PUT, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], +}] +async fn silo_quotas_update( + rqctx: RequestContext>, + path_params: Path, + new_quota: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus + .silo_update_quota(&opctx, &silo_lookup, &new_quota.into_inner()) + .await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List silos /// /// Lists silos that are discoverable based on the current permissions. diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 1848989bf9..0527d99490 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -287,6 +287,7 @@ pub async fn create_silo( name: silo_name.parse().unwrap(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::arbitrarily_high_default(), discoverable, identity_mode, admin_group_name: None, diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index 1843fc28c8..5a34caab49 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -394,6 +394,11 @@ async fn test_silo_certificates() { .name(silo2.silo_name.clone()) .description("") .discoverable(false) + .quotas(oxide_client::types::SiloQuotasCreate { + cpus: 0, + memory: oxide_client::types::ByteCount(0), + storage: oxide_client::types::ByteCount(0), + }) .identity_mode(oxide_client::types::SiloIdentityMode::LocalOnly) .tls_certificates(vec![silo2_cert.try_into().unwrap()]), ) @@ -454,6 +459,11 @@ async fn test_silo_certificates() { .name(silo3.silo_name.clone()) .description("") .discoverable(false) + .quotas(oxide_client::types::SiloQuotasCreate { + cpus: 0, + memory: oxide_client::types::ByteCount(0), + storage: oxide_client::types::ByteCount(0), + }) .identity_mode(oxide_client::types::SiloIdentityMode::LocalOnly) .tls_certificates(vec![silo3_cert.try_into().unwrap()]), ) diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index e11902d0fe..bd6df210c0 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -85,12 +85,15 @@ lazy_static! { format!("/v1/system/silos/{}", *DEMO_SILO_NAME); pub static ref DEMO_SILO_POLICY_URL: String = format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME); + pub static ref DEMO_SILO_QUOTAS_URL: String = + format!("/v1/system/silos/{}/quotas", *DEMO_SILO_NAME); pub static ref DEMO_SILO_CREATE: params::SiloCreate = params::SiloCreate { identity: IdentityMetadataCreateParams { name: DEMO_SILO_NAME.clone(), description: String::from(""), }, + quotas: params::SiloQuotasCreate::arbitrarily_high_default(), discoverable: true, identity_mode: shared::SiloIdentityMode::SamlJit, admin_group_name: None, @@ -950,6 +953,27 @@ lazy_static! { ), ], }, + VerifyEndpoint { + url: &DEMO_SILO_QUOTAS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Put( + serde_json::to_value( + params::SiloQuotasCreate::empty() + ).unwrap() + ) + ], + }, + VerifyEndpoint { + url: "/v1/system/silo-quotas", + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get + ], + }, VerifyEndpoint { url: "/v1/policy", visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 53de24c518..35c70bf874 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -24,6 +24,7 @@ mod oximeter; mod pantry; mod password_login; mod projects; +mod quotas; mod rack; mod role_assignments; mod roles_builtin; diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs new file mode 100644 index 0000000000..2fddf4e05c --- /dev/null +++ b/nexus/tests/integration_tests/quotas.rs @@ -0,0 +1,312 @@ +use anyhow::Error; +use dropshot::test_util::ClientTestContext; +use dropshot::HttpErrorResponseBody; +use http::Method; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::http_testing::TestResponse; +use nexus_test_utils::resource_helpers::create_local_user; +use nexus_test_utils::resource_helpers::grant_iam; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::populate_ip_pool; +use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params; +use nexus_types::external_api::shared; +use nexus_types::external_api::shared::SiloRole; +use nexus_types::external_api::views::SiloQuotas; +use omicron_common::api::external::ByteCount; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::InstanceCpuCount; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +struct ResourceAllocator { + auth: AuthnMode, +} + +impl ResourceAllocator { + fn new(auth: AuthnMode) -> Self { + Self { auth } + } + + async fn set_quotas( + &self, + client: &ClientTestContext, + quotas: params::SiloQuotasUpdate, + ) -> Result { + NexusRequest::object_put( + client, + "/v1/system/silos/quota-test-silo/quotas", + Some("as), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } + + async fn get_quotas(&self, client: &ClientTestContext) -> SiloQuotas { + NexusRequest::object_get( + client, + "/v1/system/silos/quota-test-silo/quotas", + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to fetch quotas") + .parsed_body() + .expect("failed to parse quotas") + } + + async fn provision_instance( + &self, + client: &ClientTestContext, + name: &str, + cpus: u16, + memory: u32, + ) -> Result { + NexusRequest::objects_post( + client, + "/v1/instances?project=project", + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "".into(), + }, + ncpus: InstanceCpuCount(cpus), + memory: ByteCount::from_gibibytes_u32(memory), + hostname: "host".to_string(), + user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: Vec::::new(), + disks: Vec::::new(), + start: false, + }, + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("Instance should be created regardless of quotas"); + + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!("/v1/instances/{}/start?project=project", name) + .as_str(), + ) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } + + async fn cleanup_instance( + &self, + client: &ClientTestContext, + name: &str, + ) -> TestResponse { + // Try to stop the instance + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!("/v1/instances/{}/stop?project=project", name).as_str(), + ) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to stop instance"); + + NexusRequest::object_delete( + client, + format!("/v1/instances/{}?project=project", name).as_str(), + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to delete instance") + } + + async fn provision_disk( + &self, + client: &ClientTestContext, + name: &str, + size: u32, + ) -> Result { + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + "/v1/disks?project=project", + ) + .body(Some(¶ms::DiskCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "".into(), + }, + size: ByteCount::from_gibibytes_u32(size), + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + })), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } +} + +async fn setup_silo_with_quota( + client: &ClientTestContext, + silo_name: &str, + quotas: params::SiloQuotasCreate, +) -> ResourceAllocator { + let silo = object_create( + client, + "/v1/system/silos", + ¶ms::SiloCreate { + identity: IdentityMetadataCreateParams { + name: silo_name.parse().unwrap(), + description: "".into(), + }, + quotas, + discoverable: true, + identity_mode: shared::SiloIdentityMode::LocalOnly, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }, + ) + .await; + + populate_ip_pool(&client, "default", None).await; + + // Create a silo user + let user = create_local_user( + client, + &silo, + &"user".parse().unwrap(), + params::UserPassword::LoginDisallowed, + ) + .await; + + // Make silo admin + grant_iam( + client, + format!("/v1/system/silos/{}", silo_name).as_str(), + SiloRole::Admin, + user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + let auth_mode = AuthnMode::SiloUser(user.id); + + NexusRequest::objects_post( + client, + "/v1/projects", + ¶ms::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: "".into(), + }, + }, + ) + .authn_as(auth_mode.clone()) + .execute() + .await + .unwrap(); + + ResourceAllocator::new(auth_mode) +} + +#[nexus_test] +async fn test_quotas(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + // Simulate space for disks + DiskTest::new(&cptestctx).await; + + let system = setup_silo_with_quota( + &client, + "quota-test-silo", + params::SiloQuotasCreate::empty(), + ) + .await; + + // Ensure trying to provision an instance with empty quotas fails + let err = system + .provision_instance(client, "instance", 1, 1) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("vCPU Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + system.cleanup_instance(client, "instance").await; + + // Up the CPU, memory quotas + system + .set_quotas( + client, + params::SiloQuotasUpdate { + cpus: Some(4), + memory: Some(ByteCount::from_gibibytes_u32(15)), + storage: Some(ByteCount::from_gibibytes_u32(2)), + }, + ) + .await + .expect("failed to set quotas"); + + let quotas = system.get_quotas(client).await; + assert_eq!(quotas.cpus, 4); + assert_eq!(quotas.memory, ByteCount::from_gibibytes_u32(15)); + assert_eq!(quotas.storage, ByteCount::from_gibibytes_u32(2)); + + // Ensure memory quota is enforced + let err = system + .provision_instance(client, "instance", 1, 16) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("Memory Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + system.cleanup_instance(client, "instance").await; + + // Allocating instance should now succeed + system + .provision_instance(client, "instance", 2, 10) + .await + .expect("Instance should've had enough resources to be provisioned"); + + let err = system + .provision_disk(client, "disk", 3) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("Storage Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + + system + .provision_disk(client, "disk", 1) + .await + .expect("Disk should be provisioned"); +} diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 3c69c8b7cd..a5d4b47eaa 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -68,6 +68,7 @@ async fn test_silos(cptestctx: &ControlPlaneTestContext) { name: cptestctx.silo_name.clone(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -284,6 +285,7 @@ async fn test_silo_admin_group(cptestctx: &ControlPlaneTestContext) { name: "silo-name".parse().unwrap(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::SamlJit, admin_group_name: Some("administrator".into()), @@ -2256,6 +2258,7 @@ async fn test_silo_authn_policy(cptestctx: &ControlPlaneTestContext) { name: silo_name, description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2332,6 +2335,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2360,6 +2364,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2387,6 +2392,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2419,6 +2425,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 5a4a61132e..3f77f4cb26 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -183,9 +183,12 @@ silo_identity_provider_list GET /v1/system/identity-providers silo_list GET /v1/system/silos silo_policy_update PUT /v1/system/silos/{silo}/policy silo_policy_view GET /v1/system/silos/{silo}/policy +silo_quotas_update PUT /v1/system/silos/{silo}/quotas +silo_quotas_view GET /v1/system/silos/{silo}/quotas silo_user_list GET /v1/system/users silo_user_view GET /v1/system/users/{user_id} silo_view GET /v1/system/silos/{silo} +system_quotas_list GET /v1/system/silo-quotas user_builtin_list GET /v1/system/users-builtin user_builtin_view GET /v1/system/users-builtin/{user} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index cde448c5b7..f27a6619e2 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -288,6 +288,12 @@ pub struct SiloCreate { /// endpoints. These should be valid for the Silo's DNS name(s). pub tls_certificates: Vec, + /// Limits the amount of provisionable CPU, memory, and storage in the Silo. + /// CPU and memory are only consumed by running instances, while storage is + /// consumed by any disk or snapshot. A value of 0 means that resource is + /// *not* provisionable. + pub quotas: SiloQuotasCreate, + /// Mapping of which Fleet roles are conferred by each Silo role /// /// The default is that no Fleet roles are conferred by any Silo roles @@ -297,6 +303,54 @@ pub struct SiloCreate { BTreeMap>, } +/// The amount of provisionable resources for a Silo +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotasCreate { + /// The amount of virtual CPUs available for running instances in the Silo + pub cpus: i64, + /// The amount of RAM (in bytes) available for running instances in the Silo + pub memory: ByteCount, + /// The amount of storage (in bytes) available for disks or snapshots + pub storage: ByteCount, +} + +impl SiloQuotasCreate { + /// All quotas set to 0 + pub fn empty() -> Self { + Self { + cpus: 0, + memory: ByteCount::from(0), + storage: ByteCount::from(0), + } + } + + /// An arbitrarily high but identifiable default for quotas + /// that can be used for creating a Silo for testing + /// + /// The only silo that customers will see that this should be set on is the default + /// silo. Ultimately the default silo should only be initialized with an empty quota, + /// but as tests currently relying on it having a quota, we need to set something. + pub fn arbitrarily_high_default() -> Self { + Self { + cpus: 9999999999, + memory: ByteCount::try_from(999999999999999999_u64).unwrap(), + storage: ByteCount::try_from(999999999999999999_u64).unwrap(), + } + } +} + +/// Updateable properties of a Silo's resource limits. +/// If a value is omitted it will not be updated. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotasUpdate { + /// The amount of virtual CPUs available for running instances in the Silo + pub cpus: Option, + /// The amount of RAM (in bytes) available for running instances in the Silo + pub memory: Option, + /// The amount of storage (in bytes) available for disks or snapshots + pub storage: Option, +} + /// Create-time parameters for a `User` #[derive(Clone, Deserialize, Serialize, JsonSchema)] pub struct UserCreate { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index af17e7e840..ecd459594a 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -49,6 +49,14 @@ pub struct Silo { BTreeMap>, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotas { + pub silo_id: Uuid, + pub cpus: i64, + pub memory: ByteCount, + pub storage: ByteCount, +} + // IDENTITY PROVIDER #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/openapi/nexus.json b/openapi/nexus.json index 7afb6cdc2f..2ddd5f0e94 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6210,6 +6210,65 @@ } } }, + "/v1/system/silo-quotas": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "Lists resource quotas for all silos", + "operationId": "system_quotas_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotasResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/system/silos": { "get": { "tags": [ @@ -6458,6 +6517,91 @@ } } }, + "/v1/system/silos/{silo}/quotas": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "View the resource quotas of a given silo", + "operationId": "silo_quotas_view", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotas" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "tags": [ + "system/silos" + ], + "summary": "Update the resource quotas of a given silo", + "description": "If a quota value is not specified, it will remain unchanged.", + "operationId": "silo_quotas_update", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotasUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotas" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/users": { "get": { "tags": [ @@ -13206,6 +13350,14 @@ "name": { "$ref": "#/components/schemas/Name" }, + "quotas": { + "description": "Limits the amount of provisionable CPU, memory, and storage in the Silo. CPU and memory are only consumed by running instances, while storage is consumed by any disk or snapshot. A value of 0 means that resource is *not* provisionable.", + "allOf": [ + { + "$ref": "#/components/schemas/SiloQuotasCreate" + } + ] + }, "tls_certificates": { "description": "Initial TLS certificates to be used for the new Silo's console and API endpoints. These should be valid for the Silo's DNS name(s).", "type": "array", @@ -13219,6 +13371,7 @@ "discoverable", "identity_mode", "name", + "quotas", "tls_certificates" ] }, @@ -13241,6 +13394,114 @@ } ] }, + "SiloQuotas": { + "type": "object", + "properties": { + "cpus": { + "type": "integer", + "format": "int64" + }, + "memory": { + "$ref": "#/components/schemas/ByteCount" + }, + "silo_id": { + "type": "string", + "format": "uuid" + }, + "storage": { + "$ref": "#/components/schemas/ByteCount" + } + }, + "required": [ + "cpus", + "memory", + "silo_id", + "storage" + ] + }, + "SiloQuotasCreate": { + "description": "The amount of provisionable resources for a Silo", + "type": "object", + "properties": { + "cpus": { + "description": "The amount of virtual CPUs available for running instances in the Silo", + "type": "integer", + "format": "int64" + }, + "memory": { + "description": "The amount of RAM (in bytes) available for running instances in the Silo", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "storage": { + "description": "The amount of storage (in bytes) available for disks or snapshots", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + }, + "required": [ + "cpus", + "memory", + "storage" + ] + }, + "SiloQuotasResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SiloQuotas" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "SiloQuotasUpdate": { + "description": "Updateable properties of a Silo's resource limits. If a value is omitted it will not be updated.", + "type": "object", + "properties": { + "cpus": { + "nullable": true, + "description": "The amount of virtual CPUs available for running instances in the Silo", + "type": "integer", + "format": "int64" + }, + "memory": { + "nullable": true, + "description": "The amount of RAM (in bytes) available for running instances in the Silo", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "storage": { + "nullable": true, + "description": "The amount of storage (in bytes) available for disks or snapshots", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + } + }, "SiloResultsPage": { "description": "A single page of results", "type": "object", diff --git a/schema/crdb/20.0.0/up01.sql b/schema/crdb/20.0.0/up01.sql new file mode 100644 index 0000000000..6a95c41e48 --- /dev/null +++ b/schema/crdb/20.0.0/up01.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( + silo_id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + cpus INT8 NOT NULL, + memory_bytes INT8 NOT NULL, + storage_bytes INT8 NOT NULL +); \ No newline at end of file diff --git a/schema/crdb/20.0.0/up02.sql b/schema/crdb/20.0.0/up02.sql new file mode 100644 index 0000000000..2909e379ca --- /dev/null +++ b/schema/crdb/20.0.0/up02.sql @@ -0,0 +1,28 @@ +set + local disallow_full_table_scans = off; + +-- Adds quotas for any existing silos without them. +-- The selected quotas are based on the resources of a half rack +-- with 30% CPU and memory reserved for internal use and a 3.5x tax +-- on storage for replication, etc. +INSERT INTO + silo_quotas ( + silo_id, + time_created, + time_modified, + cpus, + memory_bytes, + storage_bytes + ) +SELECT + s.id AS silo_id, + NOW() AS time_created, + NOW() AS time_modified, + 9999999999 AS cpus, + 999999999999999999 AS memory_bytes, + 999999999999999999 AS storage_bytes +FROM + silo s + LEFT JOIN silo_quotas sq ON s.id = sq.silo_id +WHERE + sq.silo_id IS NULL; \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0bf365a2f1..be7291b4e4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -827,6 +827,15 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_ssh_key_by_silo_user ON omicron.public. ) WHERE time_deleted IS NULL; +CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( + silo_id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + cpus INT8 NOT NULL, + memory_bytes INT8 NOT NULL, + storage_bytes INT8 NOT NULL +); + /* * Projects */ @@ -3062,7 +3071,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '19.0.0', NULL) + ( TRUE, NOW(), NOW(), '20.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 7c3cd6abe9d957a51465209497064fb133854932 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 13 Dec 2023 08:40:39 +0000 Subject: [PATCH 098/186] Update OPTE to 0.27.201 (#4683) Very minor changes, principally a tweak and probes to chase down oxidecomputer/opte#426. --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7bf813233c..4a1ac63b83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3189,7 +3189,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" [[package]] name = "illumos-utils" @@ -3596,7 +3596,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" dependencies = [ "quote", "syn 2.0.32", @@ -5297,7 +5297,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" dependencies = [ "cfg-if", "dyn-clone", @@ -5313,7 +5313,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -5325,7 +5325,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" dependencies = [ "libc", "libnet", @@ -5399,7 +5399,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=24ceba1969269e4d81bda83d8968d7d7f713c46b#24ceba1969269e4d81bda83d8968d7d7f713c46b" +source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" dependencies = [ "illumos-sys-hdrs", "opte", diff --git a/Cargo.toml b/Cargo.toml index 2ce3bcafb4..7f85d9415b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,7 +261,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.9.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "01356ee8c5d876ce6614ea550e12114c10bcfb34", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -269,7 +269,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "24ceba1969269e4d81bda83d8968d7d7f713c46b" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "01356ee8c5d876ce6614ea550e12114c10bcfb34" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index fa0ef8d768..77cae5bfa6 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.199 +0.27.201 From 870e7e59c3d8c91d4b4a0ce5a7311e84a080f8fe Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:09:01 -0800 Subject: [PATCH 099/186] Update russh monorepo to 0.40.1 (#4686) --- Cargo.lock | 8 ++++---- end-to-end-tests/Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a1ac63b83..8bfd5110f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6906,9 +6906,9 @@ dependencies = [ [[package]] name = "russh" -version = "0.40.0" +version = "0.40.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98bee7ebcce06bfc40a46b9d90205c6132d899bb9095c5ce9da3cdad8ec0833d" +checksum = "23955cec4c4186e8c36f42c5d4043f9fd6cab8702fd08ce1971d966b48ec832f" dependencies = [ "aes", "aes-gcm", @@ -6951,9 +6951,9 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.40.0" +version = "0.40.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b5d5a656fe1c3024d829d054cd8c0c78dc831e4b2d4b08360569c3b38f3017f" +checksum = "9d0de3cb3cbfa773b7f170b6830565fac207a0d630cc666a29f80097cc374dd8" dependencies = [ "aes", "async-trait", diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 8a1f91eee8..9e38112c36 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -16,8 +16,8 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest.workspace = true -russh = "0.40.0" -russh-keys = "0.40.0" +russh = "0.40.1" +russh-keys = "0.40.1" serde.workspace = true serde_json.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } From 6999098a9cd8904c5f9f1b2215c0055d19959cf8 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Thu, 14 Dec 2023 13:58:40 +0000 Subject: [PATCH 100/186] Update OPTE to 0.27.202 (#4693) Fix for https://github.com/oxidecomputer/opte/issues/428 --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8bfd5110f7..e9e5c1594d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3189,7 +3189,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" [[package]] name = "illumos-utils" @@ -3596,7 +3596,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "quote", "syn 2.0.32", @@ -5297,7 +5297,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "cfg-if", "dyn-clone", @@ -5313,7 +5313,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -5325,7 +5325,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "libc", "libnet", @@ -5399,7 +5399,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=01356ee8c5d876ce6614ea550e12114c10bcfb34#01356ee8c5d876ce6614ea550e12114c10bcfb34" +source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "illumos-sys-hdrs", "opte", diff --git a/Cargo.toml b/Cargo.toml index 7f85d9415b..841c7bb16b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,7 +261,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.9.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "01356ee8c5d876ce6614ea550e12114c10bcfb34", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -269,7 +269,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "01356ee8c5d876ce6614ea550e12114c10bcfb34" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index 77cae5bfa6..619a109b35 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.201 +0.27.202 From c82d4fcbd8dc6336c8a61281485cd4ac3fb6698f Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 14 Dec 2023 13:45:07 -0500 Subject: [PATCH 101/186] [Trivial] Fix wording and comments (#4695) The comment about sled address allocation was removed because it was implemented in https://github.com/oxidecomputer/omicron/pull/4545 --- internal-dns/src/config.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 10 +++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 86dd6e802e..92f37f6124 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -111,8 +111,8 @@ impl Host { /// /// `DnsConfigBuilder` provides a much simpler interface for constructing DNS /// zone data than using `DnsConfig` directly. That's because it makes a number -/// of assumptions that are true of the control plane DNS zone (all described in -/// RFD 248), but not true in general about DNS zones: +/// of assumptions that are true of the control plane DNS zones (all described +/// in RFD 248), but not true in general about DNS zones: /// /// - We assume that there are only two kinds of hosts: a "sled" (an illumos /// global zone) or a "zone" (an illumos non-global zone). (Both of these are diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 8038658fb1..af81df52bb 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -191,11 +191,11 @@ impl RackSetupService { /// Arguments: /// - `log`: The logger. /// - `config`: The config file, which is used to setup the rack. - /// - `storage_resources`: All the disks and zpools managed by this sled + /// - `storage_manager`: A handle for interacting with the storage manager + /// task /// - `local_bootstrap_agent`: Communication channel by which we can send - /// commands to our local bootstrap-agent (e.g., to initialize sled + /// commands to our local bootstrap-agent (e.g., to start sled-agents) /// - `bootstore` - A handle to call bootstore APIs - /// agents). pub(crate) fn new( log: Logger, config: Config, @@ -1083,10 +1083,6 @@ impl ServiceInner { ) .await?; - // TODO Questions to consider: - // - What if a sled comes online *right after* this setup? How does - // it get a /64? - Ok(()) } } From b8c8658a8c5c0d9800848bc41fcc6b54e921f79b Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 14 Dec 2023 14:39:28 -0500 Subject: [PATCH 102/186] Rename disks when un-deleting and faulting (#4681) When un-deleting phantom disks and setting them to faulted, use a new name that includes the disk's UUID: this ensures that even if a user created another disk with the same name in the project, the phantom disk can still be un-deleted and faulted, and eventually cleaned up. Fixes #4673 --- nexus/db-queries/src/db/datastore/disk.rs | 22 +++- nexus/tests/integration_tests/disks.rs | 141 +++++++++++++++++++++- 2 files changed, 158 insertions(+), 5 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 94d950f86a..2055287e62 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -657,7 +657,20 @@ impl DataStore { /// If the disk delete saga unwinds, then the disk should _not_ remain /// deleted: disk delete saga should be triggered again in order to fully /// complete, and the only way to do that is to un-delete the disk. Set it - /// to faulted to ensure that it won't be used. + /// to faulted to ensure that it won't be used. Use the disk's UUID as part + /// of its new name to ensure that even if a user created another disk that + /// shadows this "phantom" disk the original can still be un-deleted and + /// faulted. + /// + /// It's worth pointing out that it's possible that the user created a disk, + /// then used that disk's ID to make a new disk with the same name as this + /// function would have picked when undeleting the original disk. In the + /// event that the original disk's delete saga unwound, this would cause + /// that unwind to fail at this step, and would cause a stuck saga that + /// requires manual intervention. The fixes as part of addressing issue 3866 + /// should greatly reduce the number of disk delete sagas that unwind, but + /// this possibility does exist. To any customer reading this: please don't + /// name your disks `deleted-{another disk's id}` :) pub async fn project_undelete_disk_set_faulted_no_auth( &self, disk_id: &Uuid, @@ -667,12 +680,19 @@ impl DataStore { let faulted = api::external::DiskState::Faulted.label(); + // If only the UUID is used, you will hit "name cannot be a UUID to + // avoid ambiguity with IDs". Add a small prefix to avoid this, and use + // "deleted" to be unambigious to the user about what they should do + // with this disk. + let new_name = format!("deleted-{disk_id}"); + let result = diesel::update(dsl::disk) .filter(dsl::time_deleted.is_not_null()) .filter(dsl::id.eq(*disk_id)) .set(( dsl::time_deleted.eq(None::>), dsl::disk_state.eq(faulted), + dsl::name.eq(new_name), )) .check_if_exists::(*disk_id) .execute_and_check(&conn) diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 807c054b64..a7c9c99509 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1245,7 +1245,7 @@ async fn test_disk_virtual_provisioning_collection( async fn test_disk_virtual_provisioning_collection_failed_delete( cptestctx: &ControlPlaneTestContext, ) { - // Confirm that there's a panic deleting a project if a disk deletion fails + // Confirm that there's no panic deleting a project if a disk deletion fails let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); @@ -1271,6 +1271,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( }, size: disk_size, }; + NexusRequest::new( RequestBuilder::new(client, Method::POST, &disks_url) .body(Some(&disk_one)) @@ -1281,6 +1282,11 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( .await .expect("unexpected failure creating 1 GiB disk"); + // Get the disk + let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME); + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Detached); + // Assert correct virtual provisioning collection numbers let virtual_provisioning_collection = datastore .virtual_provisioning_collection_get(&opctx, project_id1) @@ -1302,8 +1308,6 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( .await; // Delete the disk - expect this to fail - let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME); - NexusRequest::new( RequestBuilder::new(client, Method::DELETE, &disk_url) .expect_status(Some(StatusCode::INTERNAL_SERVER_ERROR)), @@ -1323,7 +1327,12 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( disk_size ); - // And the disk is now faulted + // And the disk is now faulted. The name will have changed due to the + // "undelete and fault" function. + let disk_url = format!( + "/v1/disks/deleted-{}?project={}", + disk.identity.id, PROJECT_NAME + ); let disk = disk_get(&client, &disk_url).await; assert_eq!(disk.state, DiskState::Faulted); @@ -1373,6 +1382,130 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( .expect("unexpected failure deleting project"); } +#[nexus_test] +async fn test_phantom_disk_rename(cptestctx: &ControlPlaneTestContext) { + // Confirm that phantom disks are renamed when they are un-deleted and + // faulted + + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + + let _disk_test = DiskTest::new(&cptestctx).await; + + populate_ip_pool(&client, "default", None).await; + let _project_id1 = create_project(client, PROJECT_NAME).await.identity.id; + + // Create a 1 GB disk + let disk_size = ByteCount::from_gibibytes_u32(1); + let disks_url = get_disks_url(); + let disk_one = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk-one".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&disk_one)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure creating 1 GiB disk"); + + let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME); + + // Confirm it's there + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Detached); + + let original_disk_id = disk.identity.id; + + // Now, request disk delete + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &disk_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure deleting 1 GiB disk"); + + // It's gone! + NexusRequest::new( + RequestBuilder::new(client, Method::GET, &disk_url) + .expect_status(Some(StatusCode::NOT_FOUND)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected success finding 1 GiB disk"); + + // Create a new disk with the same name + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&disk_one)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure creating 1 GiB disk"); + + // Confirm it's there + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Detached); + + // Confirm it's not the same disk + let new_disk_id = disk.identity.id; + assert_ne!(original_disk_id, new_disk_id); + + // Un-delete the original and set it to faulted + datastore + .project_undelete_disk_set_faulted_no_auth(&original_disk_id) + .await + .unwrap(); + + // The original disk is now faulted + let disk_url = format!( + "/v1/disks/deleted-{}?project={}", + original_disk_id, PROJECT_NAME + ); + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Faulted); + + // Make sure original can still be deleted + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &disk_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure deleting 1 GiB disk"); + + // Make sure new can be deleted too + let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME); + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Detached); + + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &disk_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("unexpected failure deleting 1 GiB disk"); +} + // Test disk size accounting #[nexus_test] async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { From d967e52bb59ae7741586ce47d0fb46c374b7d537 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 14 Dec 2023 18:49:16 -0600 Subject: [PATCH 103/186] Bump web console (#4701) https://github.com/oxidecomputer/console/compare/1802c285...007cfe67 * [007cfe67](https://github.com/oxidecomputer/console/commit/007cfe67) oxidecomputer/console#1858 * [d536bd97](https://github.com/oxidecomputer/console/commit/d536bd97) oxidecomputer/console#1857 * [bfd59c0d](https://github.com/oxidecomputer/console/commit/bfd59c0d) oxidecomputer/console#1845 * [ba335d45](https://github.com/oxidecomputer/console/commit/ba335d45) oxidecomputer/console#1853 * [5556d881](https://github.com/oxidecomputer/console/commit/5556d881) update mockServiceWorker.js * [48da3e1c](https://github.com/oxidecomputer/console/commit/48da3e1c) oxidecomputer/console#1841 * [a7532d9a](https://github.com/oxidecomputer/console/commit/a7532d9a) oxidecomputer/console#1842 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 725bda0ee9..e62ecf2bf6 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="1802c2859f40712017ab89e72740e39bfd59320b" -SHA2="34768a895f187a6ed263c0050c42084f3907c331b547362871c2ce330e9d08d1" +COMMIT="007cfe672aa7e7c791591be089bf2a2386d2c34f" +SHA2="a4f4264229724304ee383ba55d426acd1e5d713417cf1e77fed791c3a7162abf" From 582718839abbe61fb66968e90f93c426d2857e8c Mon Sep 17 00:00:00 2001 From: Justin Bennett Date: Thu, 14 Dec 2023 20:20:52 -0500 Subject: [PATCH 104/186] Capacity and utilization (#4696) This PR is a follow up to #4605 which adds views into capacity and utilization both at the silo and system level. API: |op|method|url| |--|--|--| |silo_utilization_list | GET | /v1/system/utilization/silos | |silo_utilization_view | GET | /v1/system/utilization/silos/{silo} | |utilization_view | GET | /v1/utilization | I'm not entirely satisfied w/ the silo utilization endpoints. They could be this instead: |op|method|url| |--|--|--| |silo_utilization_list | GET | /v1/system/silos-utilization | |silo_utilization_view | GET | /v1/system/silos/{silo}/utilization | Also take special note of the views ```rust // For the eyes of end users /// View of the current silo's resource utilization and capacity #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct Utilization { /// Accounts for resources allocated to running instances or storage allocated via disks or snapshots /// Note that CPU and memory resources associated with a stopped instances are not counted here /// whereas associated disks will still be counted pub provisioned: VirtualResourceCounts, /// The total amount of resources that can be provisioned in this silo /// Actions that would exceed this limit will fail pub capacity: VirtualResourceCounts, } // For the eyes of an operator /// View of a silo's resource utilization and capacity #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SiloUtilization { pub silo_id: Uuid, pub silo_name: Name, /// Accounts for resources allocated by in silos like CPU or memory for running instances and storage for disks and snapshots /// Note that CPU and memory resources associated with a stopped instances are not counted here pub provisioned: VirtualResourceCounts, /// Accounts for the total amount of resources reserved for silos via their quotas pub allocated: VirtualResourceCounts, } ``` For users in the silo I use `provisioned` and `capacity` as the language. Their `capacity` is represented by the quota set by an operator. For the operator `provisioned` is the same but `allocated` is used to denote the amount of resources allotted via quotas. --- Note: I had planned to add a full system utilization endpoint to this PR but that would increase the scope. Instead will ship that API as a part of the next release. We can calculate some version of the full system utilization on the client by listing all the silos and their utilization. --------- Co-authored-by: Sean Klein --- common/src/api/external/http_pagination.rs | 11 +- common/src/api/external/mod.rs | 17 ++ nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/quota.rs | 21 +- nexus/db-model/src/schema.rs | 15 +- nexus/db-model/src/utilization.rs | 56 ++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + .../src/db/datastore/utilization.rs | 57 ++++ nexus/src/app/mod.rs | 1 + nexus/src/app/utilization.rs | 33 +++ nexus/src/external_api/http_entrypoints.rs | 100 ++++++- nexus/tests/integration_tests/endpoints.rs | 30 +++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/quotas.rs | 6 +- nexus/tests/integration_tests/utilization.rs | 139 ++++++++++ nexus/tests/output/nexus_tags.txt | 3 + nexus/types/src/external_api/params.rs | 8 + nexus/types/src/external_api/views.rs | 58 +++- openapi/nexus.json | 254 +++++++++++++++++- schema/crdb/21.0.0/up01.sql | 17 ++ schema/crdb/dbinit.sql | 27 +- test-utils/src/dev/test_cmds.rs | 2 +- 22 files changed, 818 insertions(+), 41 deletions(-) create mode 100644 nexus/db-model/src/utilization.rs create mode 100644 nexus/db-queries/src/db/datastore/utilization.rs create mode 100644 nexus/src/app/utilization.rs create mode 100644 nexus/tests/integration_tests/utilization.rs create mode 100644 schema/crdb/21.0.0/up01.sql diff --git a/common/src/api/external/http_pagination.rs b/common/src/api/external/http_pagination.rs index 2bc78a54d6..65237f73c6 100644 --- a/common/src/api/external/http_pagination.rs +++ b/common/src/api/external/http_pagination.rs @@ -58,6 +58,8 @@ use std::fmt::Debug; use std::num::NonZeroU32; use uuid::Uuid; +use super::SimpleIdentity; + // General pagination infrastructure /// Specifies which page of results we're on @@ -147,15 +149,14 @@ pub fn marker_for_id(_: &S, t: &T) -> Uuid { /// /// This is intended for use with [`ScanByNameOrId::results_page`] with objects /// that impl [`ObjectIdentity`]. -pub fn marker_for_name_or_id( +pub fn marker_for_name_or_id( scan: &ScanByNameOrId, item: &T, ) -> NameOrId { - let identity = item.identity(); match scan.sort_by { - NameOrIdSortMode::NameAscending => identity.name.clone().into(), - NameOrIdSortMode::NameDescending => identity.name.clone().into(), - NameOrIdSortMode::IdAscending => identity.id.into(), + NameOrIdSortMode::NameAscending => item.name().clone().into(), + NameOrIdSortMode::NameDescending => item.name().clone().into(), + NameOrIdSortMode::IdAscending => item.id().into(), } } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 64a2e462ec..aa783ac9ca 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -71,6 +71,23 @@ pub trait ObjectIdentity { fn identity(&self) -> &IdentityMetadata; } +/// Exists for types that don't properly implement `ObjectIdentity` but +/// still need to be paginated by name or id. +pub trait SimpleIdentity { + fn id(&self) -> Uuid; + fn name(&self) -> &Name; +} + +impl SimpleIdentity for T { + fn id(&self) -> Uuid { + self.identity().id + } + + fn name(&self) -> &Name { + &self.identity().name + } +} + /// Parameters used to request a specific page of results when listing a /// collection of objects /// diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 908f6f2368..2c3433b2d3 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -81,6 +81,7 @@ mod switch; mod unsigned; mod update_artifact; mod user_builtin; +mod utilization; mod virtual_provisioning_collection; mod virtual_provisioning_resource; mod vmm; @@ -167,6 +168,7 @@ pub use switch_port::*; pub use system_update::*; pub use update_artifact::*; pub use user_builtin::*; +pub use utilization::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; diff --git a/nexus/db-model/src/quota.rs b/nexus/db-model/src/quota.rs index 70a8ffa1fd..ae88e12e66 100644 --- a/nexus/db-model/src/quota.rs +++ b/nexus/db-model/src/quota.rs @@ -65,22 +65,11 @@ impl From for views::SiloQuotas { fn from(silo_quotas: SiloQuotas) -> Self { Self { silo_id: silo_quotas.silo_id, - cpus: silo_quotas.cpus, - memory: silo_quotas.memory.into(), - storage: silo_quotas.storage.into(), - } - } -} - -impl From for SiloQuotas { - fn from(silo_quotas: views::SiloQuotas) -> Self { - Self { - silo_id: silo_quotas.silo_id, - time_created: Utc::now(), - time_modified: Utc::now(), - cpus: silo_quotas.cpus, - memory: silo_quotas.memory.into(), - storage: silo_quotas.storage.into(), + limits: views::VirtualResourceCounts { + cpus: silo_quotas.cpus, + memory: silo_quotas.memory.into(), + storage: silo_quotas.storage.into(), + }, } } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 10fa8dcfac..6839af8a76 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -420,6 +420,19 @@ table! { } } +table! { + silo_utilization(silo_id) { + silo_id -> Uuid, + silo_name -> Text, + cpus_provisioned -> Int8, + memory_provisioned -> Int8, + storage_provisioned -> Int8, + cpus_allocated -> Int8, + memory_allocated -> Int8, + storage_allocated -> Int8, + } +} + table! { network_interface (id) { id -> Uuid, @@ -1333,7 +1346,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(20, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(21, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-model/src/utilization.rs b/nexus/db-model/src/utilization.rs new file mode 100644 index 0000000000..9bef4f59c7 --- /dev/null +++ b/nexus/db-model/src/utilization.rs @@ -0,0 +1,56 @@ +use crate::ByteCount; +use crate::{schema::silo_utilization, Name}; +use nexus_types::external_api::views; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Queryable, Debug, Clone, Selectable, Serialize, Deserialize)] +#[diesel(table_name = silo_utilization)] +pub struct SiloUtilization { + pub silo_id: Uuid, + pub silo_name: Name, + + pub cpus_allocated: i64, + pub memory_allocated: ByteCount, + pub storage_allocated: ByteCount, + + pub cpus_provisioned: i64, + pub memory_provisioned: ByteCount, + pub storage_provisioned: ByteCount, +} + +impl From for views::SiloUtilization { + fn from(silo_utilization: SiloUtilization) -> Self { + Self { + silo_id: silo_utilization.silo_id, + silo_name: silo_utilization.silo_name.into(), + provisioned: views::VirtualResourceCounts { + cpus: silo_utilization.cpus_provisioned, + memory: silo_utilization.memory_provisioned.into(), + storage: silo_utilization.storage_provisioned.into(), + }, + allocated: views::VirtualResourceCounts { + cpus: silo_utilization.cpus_allocated, + memory: silo_utilization.memory_allocated.into(), + storage: silo_utilization.storage_allocated.into(), + }, + } + } +} + +impl From for views::Utilization { + fn from(silo_utilization: SiloUtilization) -> Self { + Self { + provisioned: views::VirtualResourceCounts { + cpus: silo_utilization.cpus_provisioned, + memory: silo_utilization.memory_provisioned.into(), + storage: silo_utilization.storage_provisioned.into(), + }, + capacity: views::VirtualResourceCounts { + cpus: silo_utilization.cpus_allocated, + memory: silo_utilization.memory_allocated.into(), + storage: silo_utilization.storage_allocated.into(), + }, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 1609fc7101..93486771b5 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -86,6 +86,7 @@ mod switch; mod switch_interface; mod switch_port; mod update; +mod utilization; mod virtual_provisioning_collection; mod vmm; mod volume; diff --git a/nexus/db-queries/src/db/datastore/utilization.rs b/nexus/db-queries/src/db/datastore/utilization.rs new file mode 100644 index 0000000000..4fbe215fe2 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/utilization.rs @@ -0,0 +1,57 @@ +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::Name; +use crate::db::model::SiloUtilization; +use crate::db::pagination::paginated; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use ref_cast::RefCast; + +impl DataStore { + pub async fn silo_utilization_view( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_silo).await?; + let silo_id = authz_silo.id(); + + use db::schema::silo_utilization::dsl; + dsl::silo_utilization + .filter(dsl::silo_id.eq(silo_id)) + .select(SiloUtilization::as_select()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn silo_utilization_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::silo_utilization::dsl; + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::silo_utilization, dsl::silo_id, pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::silo_utilization, + dsl::silo_name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .select(SiloUtilization::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index b92714a365..5af45985db 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -64,6 +64,7 @@ mod switch_interface; mod switch_port; pub mod test_interfaces; mod update; +mod utilization; mod volume; mod vpc; mod vpc_router; diff --git a/nexus/src/app/utilization.rs b/nexus/src/app/utilization.rs new file mode 100644 index 0000000000..526ebc9470 --- /dev/null +++ b/nexus/src/app/utilization.rs @@ -0,0 +1,33 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Insights into capacity and utilization + +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db; +use nexus_db_queries::db::lookup; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; + +impl super::Nexus { + pub async fn silo_utilization_view( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + ) -> Result { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Read).await?; + self.db_datastore.silo_utilization_view(opctx, &authz_silo).await + } + + pub async fn silo_utilization_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + self.db_datastore.silo_utilization_list(opctx, pagparams).await + } +} diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 6720f95c39..042ee294b7 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6,11 +6,13 @@ use super::{ console_api, device_auth, params, + params::ProjectSelector, shared::UninitializedSled, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, - PhysicalDisk, Project, Rack, Role, Silo, Sled, Snapshot, SshKey, User, - UserBuiltin, Vpc, VpcRouter, VpcSubnet, + PhysicalDisk, Project, Rack, Role, Silo, SiloQuotas, SiloUtilization, + Sled, SledInstance, Snapshot, SshKey, Switch, User, UserBuiltin, Vpc, + VpcRouter, VpcSubnet, }, }; use crate::external_api::shared; @@ -38,6 +40,7 @@ use dropshot::{ use ipnetwork::IpNetwork; use nexus_db_queries::authz; use nexus_db_queries::db; +use nexus_db_queries::db::identity::AssetIdentityMetadata; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; @@ -45,11 +48,7 @@ use nexus_db_queries::db::model::Name; use nexus_db_queries::{ authz::ApiResource, db::fixed_data::silo::INTERNAL_SILO_ID, }; -use nexus_types::external_api::{params::ProjectSelector, views::SiloQuotas}; -use nexus_types::{ - external_api::views::{SledInstance, Switch}, - identity::AssetIdentityMetadata, -}; +use nexus_types::external_api::views::Utilization; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -272,6 +271,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_announce_set_list)?; api.register(networking_bgp_announce_set_delete)?; + api.register(utilization_view)?; + // Fleet-wide API operations api.register(silo_list)?; api.register(silo_create)?; @@ -280,8 +281,10 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(silo_policy_view)?; api.register(silo_policy_update)?; - api.register(system_quotas_list)?; + api.register(silo_utilization_view)?; + api.register(silo_utilization_list)?; + api.register(system_quotas_list)?; api.register(silo_quotas_view)?; api.register(silo_quotas_update)?; @@ -515,6 +518,87 @@ async fn policy_update( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// View the resource utilization of the user's current silo +#[endpoint { + method = GET, + path = "/v1/utilization", + tags = ["silos"], +}] +async fn utilization_view( + rqctx: RequestContext>, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = nexus.current_silo_lookup(&opctx)?; + let utilization = + nexus.silo_utilization_view(&opctx, &silo_lookup).await?; + + Ok(HttpResponseOk(utilization.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// View the current utilization of a given silo +#[endpoint { + method = GET, + path = "/v1/system/utilization/silos/{silo}", + tags = ["system/silos"], +}] +async fn silo_utilization_view( + rqctx: RequestContext>, + path_params: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quotas = nexus.silo_utilization_view(&opctx, &silo_lookup).await?; + + Ok(HttpResponseOk(quotas.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} +/// List current utilization state for all silos +#[endpoint { + method = GET, + path = "/v1/system/utilization/silos", + tags = ["system/silos"], +}] +async fn silo_utilization_list( + rqctx: RequestContext>, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pagparams, scan_params)?; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let utilization = nexus + .silo_utilization_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + utilization, + &marker_for_name_or_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// Lists resource quotas for all silos #[endpoint { method = GET, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index bd6df210c0..c41fcdbed9 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -100,6 +100,9 @@ lazy_static! { tls_certificates: vec![], mapped_fleet_roles: Default::default(), }; + + pub static ref DEMO_SILO_UTIL_URL: String = format!("/v1/system/utilization/silos/{}", *DEMO_SILO_NAME); + // Use the default Silo for testing the local IdP pub static ref DEMO_SILO_USERS_CREATE_URL: String = format!( "/v1/system/identity-providers/local/users?silo={}", @@ -121,6 +124,9 @@ lazy_static! { "/v1/system/identity-providers/local/users/{{id}}/set-password?silo={}", DEFAULT_SILO.identity().name, ); +} + +lazy_static! { // Project used for testing pub static ref DEMO_PROJECT_NAME: Name = "demo-project".parse().unwrap(); @@ -974,6 +980,30 @@ lazy_static! { AllowedMethod::Get ], }, + VerifyEndpoint { + url: "/v1/system/utilization/silos", + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get + ] + }, + VerifyEndpoint { + url: &DEMO_SILO_UTIL_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get + ] + }, + VerifyEndpoint { + url: "/v1/utilization", + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::ReadOnly, + allowed_methods: vec![ + AllowedMethod::Get + ] + }, VerifyEndpoint { url: "/v1/policy", visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 35c70bf874..6cb99b9e45 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -45,6 +45,7 @@ mod unauthorized; mod unauthorized_coverage; mod updates; mod users_builtin; +mod utilization; mod volume_management; mod vpc_firewall; mod vpc_routers; diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index 2fddf4e05c..0ad2419bee 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -269,9 +269,9 @@ async fn test_quotas(cptestctx: &ControlPlaneTestContext) { .expect("failed to set quotas"); let quotas = system.get_quotas(client).await; - assert_eq!(quotas.cpus, 4); - assert_eq!(quotas.memory, ByteCount::from_gibibytes_u32(15)); - assert_eq!(quotas.storage, ByteCount::from_gibibytes_u32(2)); + assert_eq!(quotas.limits.cpus, 4); + assert_eq!(quotas.limits.memory, ByteCount::from_gibibytes_u32(15)); + assert_eq!(quotas.limits.storage, ByteCount::from_gibibytes_u32(2)); // Ensure memory quota is enforced let err = system diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs new file mode 100644 index 0000000000..5ebf56f35a --- /dev/null +++ b/nexus/tests/integration_tests/utilization.rs @@ -0,0 +1,139 @@ +use http::Method; +use http::StatusCode; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_instance; +use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::objects_list_page_authz; +use nexus_test_utils::resource_helpers::populate_ip_pool; +use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params; +use nexus_types::external_api::params::SiloQuotasCreate; +use nexus_types::external_api::views::SiloUtilization; +use nexus_types::external_api::views::Utilization; +use nexus_types::external_api::views::VirtualResourceCounts; +use omicron_common::api::external::ByteCount; +use omicron_common::api::external::IdentityMetadataCreateParams; + +static PROJECT_NAME: &str = "utilization-test-project"; +static INSTANCE_NAME: &str = "utilization-test-instance"; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +#[nexus_test] +async fn test_utilization(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + + let current_util = objects_list_page_authz::( + client, + "/v1/system/utilization/silos", + ) + .await + .items; + + assert_eq!(current_util.len(), 2); + + assert_eq!(current_util[0].silo_name, "default-silo"); + assert_eq!(current_util[0].provisioned, SiloQuotasCreate::empty().into()); + assert_eq!( + current_util[0].allocated, + SiloQuotasCreate::arbitrarily_high_default().into() + ); + + assert_eq!(current_util[1].silo_name, "test-suite-silo"); + assert_eq!(current_util[1].provisioned, SiloQuotasCreate::empty().into()); + assert_eq!(current_util[1].allocated, SiloQuotasCreate::empty().into()); + + let _ = create_project(&client, &PROJECT_NAME).await; + let _ = create_instance(client, &PROJECT_NAME, &INSTANCE_NAME).await; + + // Start instance + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!( + "/v1/instances/{}/start?project={}", + &INSTANCE_NAME, &PROJECT_NAME + ) + .as_str(), + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to start instance"); + + // get utilization for just the default silo + let silo_util = NexusRequest::object_get( + client, + "/v1/system/utilization/silos/default-silo", + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to fetch silo utilization") + .parsed_body::() + .unwrap(); + + assert_eq!( + silo_util.provisioned, + VirtualResourceCounts { + cpus: 4, + memory: ByteCount::from_gibibytes_u32(1), + storage: ByteCount::from(0) + } + ); + + // Simulate space for disks + DiskTest::new(&cptestctx).await; + + // provision disk + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!("/v1/disks?project={}", &PROJECT_NAME).as_str(), + ) + .body(Some(¶ms::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "test-disk".parse().unwrap(), + description: "".into(), + }, + size: ByteCount::from_gibibytes_u32(2), + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + })) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("disk failed to create"); + + // Get the silo but this time using the silo admin view + let silo_util = NexusRequest::object_get(client, "/v1/utilization") + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to fetch utilization for current (default) silo") + .parsed_body::() + .unwrap(); + + assert_eq!( + silo_util.provisioned, + VirtualResourceCounts { + cpus: 4, + memory: ByteCount::from_gibibytes_u32(1), + storage: ByteCount::from_gibibytes_u32(2) + } + ); +} diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 3f77f4cb26..7e1dc306d5 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -106,6 +106,7 @@ group_view GET /v1/groups/{group_id} policy_update PUT /v1/policy policy_view GET /v1/policy user_list GET /v1/users +utilization_view GET /v1/utilization API operations found with tag "snapshots" OPERATION ID METHOD URL PATH @@ -187,6 +188,8 @@ silo_quotas_update PUT /v1/system/silos/{silo}/quotas silo_quotas_view GET /v1/system/silos/{silo}/quotas silo_user_list GET /v1/system/users silo_user_view GET /v1/system/users/{user_id} +silo_utilization_list GET /v1/system/utilization/silos +silo_utilization_view GET /v1/system/utilization/silos/{silo} silo_view GET /v1/system/silos/{silo} system_quotas_list GET /v1/system/silo-quotas user_builtin_list GET /v1/system/users-builtin diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index f27a6619e2..df399e310c 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -339,6 +339,14 @@ impl SiloQuotasCreate { } } +// This conversion is mostly just useful for tests such that we can reuse +// empty() and arbitrarily_high_default() when testing utilization +impl From for super::views::VirtualResourceCounts { + fn from(quota: SiloQuotasCreate) -> Self { + Self { cpus: quota.cpus, memory: quota.memory, storage: quota.storage } + } +} + /// Updateable properties of a Silo's resource limits. /// If a value is omitted it will not be updated. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index ecd459594a..46a8aa3d95 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -13,7 +13,7 @@ use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ ByteCount, Digest, IdentityMetadata, InstanceState, Ipv4Net, Ipv6Net, Name, - ObjectIdentity, RoleName, SemverVersion, + ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -49,14 +49,64 @@ pub struct Silo { BTreeMap>, } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SiloQuotas { - pub silo_id: Uuid, +/// A collection of resource counts used to describe capacity and utilization +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] +pub struct VirtualResourceCounts { + /// Number of virtual CPUs pub cpus: i64, + /// Amount of memory in bytes pub memory: ByteCount, + /// Amount of disk storage in bytes pub storage: ByteCount, } +/// A collection of resource counts used to set the virtual capacity of a silo +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotas { + pub silo_id: Uuid, + #[serde(flatten)] + pub limits: VirtualResourceCounts, +} + +// For the eyes of end users +/// View of the current silo's resource utilization and capacity +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct Utilization { + /// Accounts for resources allocated to running instances or storage allocated via disks or snapshots + /// Note that CPU and memory resources associated with a stopped instances are not counted here + /// whereas associated disks will still be counted + pub provisioned: VirtualResourceCounts, + /// The total amount of resources that can be provisioned in this silo + /// Actions that would exceed this limit will fail + pub capacity: VirtualResourceCounts, +} + +// For the eyes of an operator +/// View of a silo's resource utilization and capacity +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloUtilization { + pub silo_id: Uuid, + pub silo_name: Name, + /// Accounts for resources allocated by in silos like CPU or memory for running instances and storage for disks and snapshots + /// Note that CPU and memory resources associated with a stopped instances are not counted here + pub provisioned: VirtualResourceCounts, + /// Accounts for the total amount of resources reserved for silos via their quotas + pub allocated: VirtualResourceCounts, +} + +// We want to be able to paginate SiloUtilization by NameOrId +// but we can't derive ObjectIdentity because this isn't a typical asset. +// Instead we implement this new simple identity trait which is used under the +// hood by the pagination code. +impl SimpleIdentity for SiloUtilization { + fn id(&self) -> Uuid { + self.silo_id + } + fn name(&self) -> &Name { + &self.silo_name + } +} + // IDENTITY PROVIDER #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/openapi/nexus.json b/openapi/nexus.json index 2ddd5f0e94..2a18934718 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6815,6 +6815,103 @@ } } }, + "/v1/system/utilization/silos": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "List current utilization state for all silos", + "operationId": "silo_utilization_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloUtilizationResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, + "/v1/system/utilization/silos/{silo}": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "View the current utilization of a given silo", + "operationId": "silo_utilization_view", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloUtilization" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/users": { "get": { "tags": [ @@ -6883,6 +6980,33 @@ } } }, + "/v1/utilization": { + "get": { + "tags": [ + "silos" + ], + "summary": "View the resource utilization of the user's current silo", + "operationId": "utilization_view", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Utilization" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/vpc-firewall-rules": { "get": { "tags": [ @@ -13395,21 +13519,33 @@ ] }, "SiloQuotas": { + "description": "A collection of resource counts used to set the virtual capacity of a silo", "type": "object", "properties": { "cpus": { + "description": "Number of virtual CPUs", "type": "integer", "format": "int64" }, "memory": { - "$ref": "#/components/schemas/ByteCount" + "description": "Amount of memory in bytes", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] }, "silo_id": { "type": "string", "format": "uuid" }, "storage": { - "$ref": "#/components/schemas/ByteCount" + "description": "Amount of disk storage in bytes", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] } }, "required": [ @@ -13568,6 +13704,62 @@ "role_name" ] }, + "SiloUtilization": { + "description": "View of a silo's resource utilization and capacity", + "type": "object", + "properties": { + "allocated": { + "description": "Accounts for the total amount of resources reserved for silos via their quotas", + "allOf": [ + { + "$ref": "#/components/schemas/VirtualResourceCounts" + } + ] + }, + "provisioned": { + "description": "Accounts for resources allocated by in silos like CPU or memory for running instances and storage for disks and snapshots Note that CPU and memory resources associated with a stopped instances are not counted here", + "allOf": [ + { + "$ref": "#/components/schemas/VirtualResourceCounts" + } + ] + }, + "silo_id": { + "type": "string", + "format": "uuid" + }, + "silo_name": { + "$ref": "#/components/schemas/Name" + } + }, + "required": [ + "allocated", + "provisioned", + "silo_id", + "silo_name" + ] + }, + "SiloUtilizationResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SiloUtilization" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "Sled": { "description": "An operator's view of a Sled.", "type": "object", @@ -14891,6 +15083,64 @@ "username" ] }, + "Utilization": { + "description": "View of the current silo's resource utilization and capacity", + "type": "object", + "properties": { + "capacity": { + "description": "The total amount of resources that can be provisioned in this silo Actions that would exceed this limit will fail", + "allOf": [ + { + "$ref": "#/components/schemas/VirtualResourceCounts" + } + ] + }, + "provisioned": { + "description": "Accounts for resources allocated to running instances or storage allocated via disks or snapshots Note that CPU and memory resources associated with a stopped instances are not counted here whereas associated disks will still be counted", + "allOf": [ + { + "$ref": "#/components/schemas/VirtualResourceCounts" + } + ] + } + }, + "required": [ + "capacity", + "provisioned" + ] + }, + "VirtualResourceCounts": { + "description": "A collection of resource counts used to describe capacity and utilization", + "type": "object", + "properties": { + "cpus": { + "description": "Number of virtual CPUs", + "type": "integer", + "format": "int64" + }, + "memory": { + "description": "Amount of memory in bytes", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "storage": { + "description": "Amount of disk storage in bytes", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + }, + "required": [ + "cpus", + "memory", + "storage" + ] + }, "Vpc": { "description": "View of a VPC", "type": "object", diff --git a/schema/crdb/21.0.0/up01.sql b/schema/crdb/21.0.0/up01.sql new file mode 100644 index 0000000000..0aaedc1862 --- /dev/null +++ b/schema/crdb/21.0.0/up01.sql @@ -0,0 +1,17 @@ +CREATE VIEW IF NOT EXISTS omicron.public.silo_utilization AS +SELECT + c.id AS silo_id, + s.name AS silo_name, + c.cpus_provisioned AS cpus_provisioned, + c.ram_provisioned AS memory_provisioned, + c.virtual_disk_bytes_provisioned AS storage_provisioned, + q.cpus AS cpus_allocated, + q.memory_bytes AS memory_allocated, + q.storage_bytes AS storage_allocated +FROM + omicron.public.virtual_provisioning_collection AS c + RIGHT JOIN omicron.public.silo_quotas AS q ON c.id = q.silo_id + INNER JOIN omicron.public.silo AS s ON c.id = s.id +WHERE + c.collection_type = 'Silo' + AND s.time_deleted IS NULL; \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index be7291b4e4..cc61148048 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -836,6 +836,31 @@ CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( storage_bytes INT8 NOT NULL ); +/** + * A view of the amount of provisioned and allocated (set by quotas) resources + * on a given silo. + */ +CREATE VIEW IF NOT EXISTS omicron.public.silo_utilization +AS SELECT + c.id AS silo_id, + s.name AS silo_name, + c.cpus_provisioned AS cpus_provisioned, + c.ram_provisioned AS memory_provisioned, + c.virtual_disk_bytes_provisioned AS storage_provisioned, + q.cpus AS cpus_allocated, + q.memory_bytes AS memory_allocated, + q.storage_bytes AS storage_allocated +FROM + omicron.public.virtual_provisioning_collection AS c + RIGHT JOIN omicron.public.silo_quotas AS q + ON c.id = q.silo_id + INNER JOIN omicron.public.silo AS s + ON c.id = s.id +WHERE + c.collection_type = 'Silo' +AND + s.time_deleted IS NULL; + /* * Projects */ @@ -3071,7 +3096,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '20.0.0', NULL) + ( TRUE, NOW(), NOW(), '21.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/test-utils/src/dev/test_cmds.rs b/test-utils/src/dev/test_cmds.rs index f8fc6b1d27..15c94554c8 100644 --- a/test-utils/src/dev/test_cmds.rs +++ b/test-utils/src/dev/test_cmds.rs @@ -39,7 +39,7 @@ pub fn path_to_executable(cmd_name: &str) -> PathBuf { #[track_caller] pub fn assert_exit_code(exit_status: ExitStatus, code: u32, stderr_text: &str) { if let ExitStatus::Exited(exit_code) = exit_status { - assert_eq!(exit_code, code); + assert_eq!(exit_code, code, "stderr:\n{}", stderr_text); } else { panic!( "expected normal process exit with code {}, got {:?}\n\nprocess stderr:{}", From b1ebae8ab2cb7e636f04d847e0ac77aba891ff30 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Fri, 15 Dec 2023 01:23:48 +0000 Subject: [PATCH 105/186] Update dendrite to 1c2f91a (#4513) Updated dendrite to commit 1c2f91a. --------- Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 8516a50e65..1eca2004f8 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" -source.sha256 = "b14e73c8091a004472f9825b9b81b2c685bc5a48801704380a80481499060ad9" +source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" +source.sha256 = "052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" -source.sha256 = "06575bea6173d16f6d206b580956ae2cdc72c65df2eb2f40dac01468ab49e336" +source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" +source.sha256 = "3ebc1ee37c4d7a0657a78abbaad2fe81570da88128505bfdc4ea47e3e05c6277" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "45e05b2a90203d84510e0c8e902d9449b09ffd9b" -source.sha256 = "db2a398426fe59bd911eed91a3db7731a7a4d57e31dd357d89828d04b0891e2a" +source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" +source.sha256 = "18079b2ce1003facb476e28499f2e31ebe092510ecd6c685fa1a91f1a34f2dda" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index c2afe5ca87..6bda68c69d 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="45e05b2a90203d84510e0c8e902d9449b09ffd9b" +COMMIT="1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 2b4f0e7555..de183cb496 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="b14e73c8091a004472f9825b9b81b2c685bc5a48801704380a80481499060ad9" -CIDL_SHA256_LINUX_DPD="a0d92b5007826b119c68fdaef753e33b125740ec7b3e771bfa6b3aa8d9fcb8cc" -CIDL_SHA256_LINUX_SWADM="13387460db5b57e6ffad6c0b8877af32cc6d53fecc4a1a0910143c0446d39a38" +CIDL_SHA256_ILLUMOS="052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" +CIDL_SHA256_LINUX_DPD="5c8bc252818897bc552a039f2423eb668d99e19ef54374644412c7aca533f94e" +CIDL_SHA256_LINUX_SWADM="9d549fc3ebaf392961404b50e802ccb5e81e41e779ecc46166d49e5fb44b524f" From 137559c74ec190623c2f1d4c24e56f9735c98ee1 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 15 Dec 2023 20:55:13 +0000 Subject: [PATCH 106/186] Fix FIP creation being able to access IP Pools in other silos (#4705) As raised in Matrix, this backports a fix from #4261 where a new floating IP can be allocated using the name or ID of an IP pool which is bound to another silo. --------- Co-authored-by: David Crespo --- .../src/db/datastore/external_ip.rs | 42 +++++++---- nexus/src/app/sagas/disk_create.rs | 2 +- nexus/src/app/sagas/disk_delete.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/test-utils/src/resource_helpers.rs | 3 +- nexus/tests/integration_tests/external_ips.rs | 71 +++++++++++++++++-- nexus/tests/integration_tests/instances.rs | 2 +- 7 files changed, 100 insertions(+), 24 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index ddf396f871..2adeebd819 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -147,22 +147,34 @@ impl DataStore { ) -> CreateResult { let ip_id = Uuid::new_v4(); - let pool_id = match params.pool { - Some(NameOrId::Name(name)) => { - LookupPath::new(opctx, self) - .ip_pool_name(&Name(name)) - .fetch_for(authz::Action::Read) - .await? - .1 - } - Some(NameOrId::Id(id)) => { - LookupPath::new(opctx, self) - .ip_pool_id(id) - .fetch_for(authz::Action::Read) - .await? - .1 + // See `allocate_instance_ephemeral_ip`: we're replicating + // its strucutre to prevent cross-silo pool access. + let pool_id = if let Some(name_or_id) = params.pool { + let (.., authz_pool, pool) = match name_or_id { + NameOrId::Name(name) => { + LookupPath::new(opctx, self) + .ip_pool_name(&Name(name)) + .fetch_for(authz::Action::CreateChild) + .await? + } + NameOrId::Id(id) => { + LookupPath::new(opctx, self) + .ip_pool_id(id) + .fetch_for(authz::Action::CreateChild) + .await? + } + }; + + let authz_silo_id = opctx.authn.silo_required()?.id(); + if let Some(pool_silo_id) = pool.silo_id { + if pool_silo_id != authz_silo_id { + return Err(authz_pool.not_found()); + } } - None => self.ip_pools_fetch_default(opctx).await?, + + pool + } else { + self.ip_pools_fetch_default(opctx).await? } .id(); diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 4883afaddc..ab62977746 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -854,7 +854,7 @@ pub(crate) mod test { const PROJECT_NAME: &str = "springfield-squidport"; async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None).await; + create_ip_pool(&client, "p0", None, None).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 8f6d74da0a..f791d289db 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -202,7 +202,7 @@ pub(crate) mod test { const PROJECT_NAME: &str = "springfield-squidport"; async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None).await; + create_ip_pool(&client, "p0", None, None).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 3b4dfc0043..c3fe6fc327 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1786,7 +1786,7 @@ mod test { const INSTANCE_NAME: &str = "base-instance"; async fn create_org_project_and_disk(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None).await; + create_ip_pool(&client, "p0", None, None).await; create_project(client, PROJECT_NAME).await; create_disk(client, PROJECT_NAME, DISK_NAME).await.identity.id } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 0527d99490..c72c7ad780 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -134,6 +134,7 @@ pub async fn create_ip_pool( client: &ClientTestContext, pool_name: &str, ip_range: Option, + silo: Option, ) -> (IpPool, IpPoolRange) { let pool = object_create( client, @@ -143,7 +144,7 @@ pub async fn create_ip_pool( name: pool_name.parse().unwrap(), description: String::from("an ip pool"), }, - silo: None, + silo: silo.map(|id| NameOrId::Id(id)), is_default: false, }, ) diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index f3161dea72..daec8e2064 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -19,14 +19,17 @@ use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::external_api::shared; use nexus_types::external_api::views::FloatingIp; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; +use omicron_common::api::external::NameOrId; use uuid::Uuid; type ControlPlaneTestContext = @@ -34,7 +37,8 @@ type ControlPlaneTestContext = const PROJECT_NAME: &str = "rootbeer-float"; -const FIP_NAMES: &[&str] = &["vanilla", "chocolate", "strawberry", "pistachio"]; +const FIP_NAMES: &[&str] = + &["vanilla", "chocolate", "strawberry", "pistachio", "caramel"]; pub fn get_floating_ips_url(project_name: &str) -> String { format!("/v1/floating-ips?project={project_name}") @@ -107,7 +111,7 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 5)) .unwrap(), ); - create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + create_ip_pool(&client, "other-pool", Some(other_pool_range), None).await; let project = create_project(client, PROJECT_NAME).await; @@ -142,7 +146,7 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(fip.instance_id, None); assert_eq!(fip.ip, ip_addr); - // Create with no chosen IP from named pool. + // Create with no chosen IP from fleet-scoped named pool. let fip_name = FIP_NAMES[2]; let fip = create_floating_ip( client, @@ -157,7 +161,7 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(fip.instance_id, None); assert_eq!(fip.ip, IpAddr::from(Ipv4Addr::new(10, 1, 0, 1))); - // Create with chosen IP from named pool. + // Create with chosen IP from fleet-scoped named pool. let fip_name = FIP_NAMES[3]; let ip_addr = "10.1.0.5".parse().unwrap(); let fip = create_floating_ip( @@ -174,6 +178,65 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(fip.ip, ip_addr); } +#[nexus_test] +async fn test_floating_ip_create_fails_in_other_silo_pool( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + populate_ip_pool(&client, "default", None).await; + + let project = create_project(client, PROJECT_NAME).await; + + // Create other silo and pool linked to that silo + let other_silo = create_silo( + &client, + "not-my-silo", + true, + shared::SiloIdentityMode::SamlJit, + ) + .await; + let other_pool_range = IpRange::V4( + Ipv4Range::new(Ipv4Addr::new(10, 2, 0, 1), Ipv4Addr::new(10, 2, 0, 5)) + .unwrap(), + ); + create_ip_pool( + &client, + "external-silo-pool", + Some(other_pool_range), + Some(other_silo.identity.id), + ) + .await; + + let fip_name = FIP_NAMES[4]; + + // creating a floating IP should fail with a 404 as if the specified pool + // does not exist + let url = + format!("/v1/floating-ips?project={}", project.identity.name.as_str()); + let body = params::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: fip_name.parse().unwrap(), + description: String::from("a floating ip"), + }, + address: None, + pool: Some(NameOrId::Name("external-silo-pool".parse().unwrap())), + }; + + let error = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(&body)) + .expect_status(Some(StatusCode::NOT_FOUND)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + assert_eq!( + error.message, + "not found: ip-pool with name \"external-silo-pool\"" + ); +} + #[nexus_test] async fn test_floating_ip_create_ip_in_use( cptestctx: &ControlPlaneTestContext, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 19b507f5bb..4acc918333 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3563,7 +3563,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( .unwrap(), ); populate_ip_pool(&client, "default", Some(default_pool_range)).await; - create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + create_ip_pool(&client, "other-pool", Some(other_pool_range), None).await; // Create an instance with pool name blank, expect IP from default pool create_instance_with_pool(client, "default-pool-inst", None).await; From 2195b56ae6eb822e80d70a9b33cb946c1ea47063 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 15 Dec 2023 14:55:56 -0600 Subject: [PATCH 107/186] Bump web console (#4706) https://github.com/oxidecomputer/console/compare/007cfe67...ad2ea54a * [ad2ea54a](https://github.com/oxidecomputer/console/commit/ad2ea54a) oxidecomputer/console#1844 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index e62ecf2bf6..1b2d1b273a 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="007cfe672aa7e7c791591be089bf2a2386d2c34f" -SHA2="a4f4264229724304ee383ba55d426acd1e5d713417cf1e77fed791c3a7162abf" +COMMIT="ad2ea54a27615e21a4993fbeff3fd83fbc2098a4" +SHA2="20c62ec121948fd0794b6e1f0326d3d8e701e4a3872b18e7d4752e92b614d185" From 83e01a51264497f780acc0f89fec65d70fc1aeca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:03:05 -0800 Subject: [PATCH 108/186] Bump zerocopy from 0.7.26 to 0.7.31 (#4702) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9e5c1594d..121e31550f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,7 +62,7 @@ dependencies = [ "getrandom 0.2.10", "once_cell", "version_check", - "zerocopy 0.7.26", + "zerocopy 0.7.31", ] [[package]] @@ -5178,7 +5178,7 @@ dependencies = [ "usdt", "uuid", "yasna", - "zerocopy 0.7.26", + "zerocopy 0.7.31", "zeroize", "zip", ] @@ -5405,7 +5405,7 @@ dependencies = [ "opte", "serde", "smoltcp 0.10.0", - "zerocopy 0.7.26", + "zerocopy 0.7.31", ] [[package]] @@ -10191,12 +10191,12 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.26" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" dependencies = [ "byteorder", - "zerocopy-derive 0.7.26", + "zerocopy-derive 0.7.31", ] [[package]] @@ -10223,9 +10223,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.26" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 01cd1bdb68..8998f7594b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -112,7 +112,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.26", features = ["derive", "simd"] } +zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } @@ -216,7 +216,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.26", features = ["derive", "simd"] } +zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } From 4ae84726c2d2382fa643b29e04585727e204148f Mon Sep 17 00:00:00 2001 From: David Crespo Date: Sat, 16 Dec 2023 22:14:12 -0600 Subject: [PATCH 109/186] Bump web console (utilization rounding fix) (#4710) https://github.com/oxidecomputer/console/compare/ad2ea54a...02c6ce74 * [02c6ce74](https://github.com/oxidecomputer/console/commit/02c6ce74) oxidecomputer/console#1865 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 1b2d1b273a..785c535e8d 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="ad2ea54a27615e21a4993fbeff3fd83fbc2098a4" -SHA2="20c62ec121948fd0794b6e1f0326d3d8e701e4a3872b18e7d4752e92b614d185" +COMMIT="02c6ce747fd5dd05e9d454ecb1bf70392c9d954e" +SHA2="39fd191993e147a569e28df86414e3d0f33963b7675474d7c522c3f685d4d4f0" From d5fb85eb605bc04244185eef93cc10f09b50e712 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Mon, 18 Dec 2023 11:15:15 -0600 Subject: [PATCH 110/186] [trivial] Move `SCHEMA_VERSION` to the top of `schema.rs` (#4703) Digging for this in the middle of the file when I want to bump the version drives me nuts. Will obviously hold until after release. --- nexus/db-model/src/schema.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 6839af8a76..7f4bf51487 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -8,6 +8,13 @@ use omicron_common::api::external::SemverVersion; +/// The version of the database schema this particular version of Nexus was +/// built against. +/// +/// This should be updated whenever the schema is changed. For more details, +/// refer to: schema/crdb/README.adoc +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(21, 0, 0); + table! { disk (id) { id -> Uuid, @@ -1341,13 +1348,6 @@ table! { } } -/// The version of the database schema this particular version of Nexus was -/// built against. -/// -/// This should be updated whenever the schema is changed. For more details, -/// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(21, 0, 0); - allow_tables_to_appear_in_same_query!( system_update, component_update, From 1032885c94d1b8581796b26cb4a254b84adaf5e1 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Mon, 18 Dec 2023 18:16:27 +0000 Subject: [PATCH 111/186] Configure bash environment for NGZ root. (#4712) This removes the special case for the switch zone and deploys a consistent profile and bashrc to all non-global zones. While understanding this is subjective, improvements here are: * A switch zone prompt includes the local switch number (0/1) if it can be determined; * PATH is configured to include additional directories useful within the zone; * The hostname part of the prompt is truncated in zones which have UUIDs as part of their name; * Coloured prompt, as per the GZ. Pretty much everyone has their own preferred prompt format, so consensus is unlikely here, but this is a step forward in having consistency and a better PATH. In the limit, nobody will be logging into these zones outside of a development environment anyway. --- package-manifest.toml | 14 +++++++++- smf/profile/bashrc | 42 ++++++++++++++++++++++++++++++ smf/profile/profile | 24 +++++++++++++++++ smf/switch_zone_setup/root.profile | 3 --- 4 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 smf/profile/bashrc create mode 100644 smf/profile/profile delete mode 100644 smf/switch_zone_setup/root.profile diff --git a/package-manifest.toml b/package-manifest.toml index 1eca2004f8..6bd40c320d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -69,6 +69,7 @@ service_name = "overlay" source.type = "composite" source.packages = [ "logadm.tar.gz", + "profile.tar.gz", ] output.type = "zone" @@ -83,6 +84,18 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true +# +# The profile package is an overlay for all non-global zones to configure +# root's bash environment. +[package.profile] +service_name = "profile" +source.type = "local" +source.paths = [ + { from = "smf/profile/profile", to = "/root/.profile" }, + { from = "smf/profile/bashrc", to = "/root/.bashrc" }, +] +output.type = "zone" +output.intermediate_only = true [package.omicron-nexus] service_name = "nexus" @@ -335,7 +348,6 @@ source.paths = [ { from = "smf/switch_zone_setup/manifest.xml", to = "/var/svc/manifest/site/switch_zone_setup/manifest.xml" }, { from = "smf/switch_zone_setup/switch_zone_setup", to = "/opt/oxide/bin/switch_zone_setup" }, { from = "smf/switch_zone_setup/support_authorized_keys", to = "/opt/oxide/support/authorized_keys" }, - { from = "smf/switch_zone_setup/root.profile", to = "/root/.profile" }, ] output.type = "zone" output.intermediate_only = true diff --git a/smf/profile/bashrc b/smf/profile/bashrc new file mode 100644 index 0000000000..d19e41e5f7 --- /dev/null +++ b/smf/profile/bashrc @@ -0,0 +1,42 @@ + +C_RED='\[\033[01;31m\]' +C_GREEN='\[\033[01;32m\]' +C_CYAN='\[\033[01;36m\]' +C_BLD='\[\033[1m\]' +C_NUL='\[\033[00m\]' + +typeset _hst="$HOSTNAME" +typeset _hstc="$C_RED$HOSTNAME" +case "$_hst" in + oxz_switch) + # Try to determine which switch zone we are + _switchid=$(curl -s http://localhost:12225/local/switch-id \ + | /opt/ooce/bin/jq -r .slot) + if (( $? == 0 )) && [[ -n "$_switchid" ]]; then + _hst+="$_switchid" + _hstc+="$C_CYAN$_switchid" + fi + ;; + oxz_*-*) + # Shorten the hostname by truncating the UUID so that the prompt + # doesn't take up an excessive amount of width + _hst="${HOSTNAME%%-*}" + _hstc="$C_RED${HOSTNAME%%-*}" + ;; +esac + +if [[ -n $SSH_CLIENT ]]; then + echo -ne "\033]0;${_hst} \007" + export PROMPT_COMMAND='history -a' +fi + +case "$TERM" in +xterm*|rxvt*|screen*|sun-color) + PS1="$C_GREEN\\u$C_NUL@$_hstc$C_NUL:$C_RED\\w$C_NUL$C_BLD\\\$$C_NUL " + ;; +*) + PS1="\\u@$_hst:\\w\\$ " +esac + +export PS1 + diff --git a/smf/profile/profile b/smf/profile/profile new file mode 100644 index 0000000000..8f613d4d56 --- /dev/null +++ b/smf/profile/profile @@ -0,0 +1,24 @@ + +PATH+=:/opt/ooce/bin + +case "$HOSTNAME" in + oxz_switch) + # Add tools like xcvradm, swadm & ddmadm to the PATH by default + PATH+=:/opt/oxide/bin:/opt/oxide/dendrite/bin:/opt/oxide/mg-ddm/bin + ;; + oxz_cockroachdb*) + PATH+=:/opt/oxide/cockroachdb/bin + ;; + oxz_crucible*) + PATH+=:/opt/oxide/crucible/bin + ;; + oxz_clockhouse*) + PATH+=:/opt/oxide/clickhouse + ;; + oxz_external_dns*|oxz_internal_dns*) + PATH+=:/opt/oxide/dns-server/bin + ;; +esac + +[ -f ~/.bashrc ] && . ~/.bashrc + diff --git a/smf/switch_zone_setup/root.profile b/smf/switch_zone_setup/root.profile deleted file mode 100644 index b62b9e5403..0000000000 --- a/smf/switch_zone_setup/root.profile +++ /dev/null @@ -1,3 +0,0 @@ -# Add tools like xcvradm, swadm & ddmadm to the PATH by default -export PATH=$PATH:/opt/oxide/bin:/opt/oxide/dendrite/bin:/opt/oxide/mg-ddm/bin - From 6d3b8be167f2ef328e2976586de7006bb4ddbe03 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Mon, 18 Dec 2023 11:28:07 -0800 Subject: [PATCH 112/186] Move instances to Failed if sled agent returns an "unhealthy" error type from calls to stop/reboot (#4711) Restore `instance_reboot` and `instance_stop` to their prior behavior: if these routines try to contact sled agent and get back a server error, mark the instance as unhealthy and move it to the Failed state. Also use `#[source]` instead of message interpolation in `InstanceStateChangeError::SledAgent`. This restores the status quo ante from #4682 in anticipation of reaching a better overall mechanism for dealing with failures to communicate about instances with sled agents. See #3206, #3238, and #4226 for more discussion. Tests: new integration test; stood up a dev cluster, started an instance, killed the zone with `zoneadm halt`, and verified that calls to reboot/stop the instance eventually marked it as Failed (due to a timeout attempting to contact the Propolis zone). Fixes #4709. --- nexus/src/app/instance.rs | 71 ++++++++++++++----- nexus/tests/integration_tests/instances.rs | 80 ++++++++++++++++++++++ 2 files changed, 132 insertions(+), 19 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 93386a66d0..4045269878 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -65,7 +65,7 @@ type SledAgentClientError = sled_agent_client::Error; // Newtype wrapper to avoid the orphan type rule. -#[derive(Debug)] +#[derive(Debug, thiserror::Error)] pub struct SledAgentInstancePutError(pub SledAgentClientError); impl std::fmt::Display for SledAgentInstancePutError { @@ -117,8 +117,8 @@ impl SledAgentInstancePutError { #[derive(Debug, thiserror::Error)] pub enum InstanceStateChangeError { /// Sled agent returned an error from one of its instance endpoints. - #[error("sled agent client error: {0}")] - SledAgent(SledAgentInstancePutError), + #[error("sled agent client error")] + SledAgent(#[source] SledAgentInstancePutError), /// Some other error occurred outside of the attempt to communicate with /// sled agent. @@ -624,14 +624,31 @@ impl super::Nexus { .instance_fetch_with_vmm(opctx, &authz_instance) .await?; - self.instance_request_state( - opctx, - &authz_instance, - state.instance(), - state.vmm(), - InstanceStateChangeRequest::Reboot, - ) - .await?; + if let Err(e) = self + .instance_request_state( + opctx, + &authz_instance, + state.instance(), + state.vmm(), + InstanceStateChangeRequest::Reboot, + ) + .await + { + if let InstanceStateChangeError::SledAgent(inner) = &e { + if inner.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &authz_instance.id(), + state.instance().runtime(), + inner, + ) + .await; + } + } + + return Err(e.into()); + } + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } @@ -711,14 +728,30 @@ impl super::Nexus { .instance_fetch_with_vmm(opctx, &authz_instance) .await?; - self.instance_request_state( - opctx, - &authz_instance, - state.instance(), - state.vmm(), - InstanceStateChangeRequest::Stop, - ) - .await?; + if let Err(e) = self + .instance_request_state( + opctx, + &authz_instance, + state.instance(), + state.vmm(), + InstanceStateChangeRequest::Stop, + ) + .await + { + if let InstanceStateChangeError::SledAgent(inner) = &e { + if inner.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &authz_instance.id(), + state.instance().runtime(), + inner, + ) + .await; + } + } + + return Err(e.into()); + } self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 4acc918333..44b65fa67b 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -867,6 +867,86 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { } } +// Verifies that if a request to reboot or stop an instance fails because of a +// 500-level error from sled agent, then the instance moves to the Failed state. +#[nexus_test] +async fn test_instance_failed_after_sled_agent_error( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let instance_name = "losing-is-fun"; + + // Create and start the test instance. + create_org_and_project(&client).await; + let instance_url = get_instance_url(instance_name); + let instance = create_instance(client, PROJECT_NAME, instance_name).await; + instance_simulate(nexus, &instance.identity.id).await; + let instance_next = instance_get(&client, &instance_url).await; + assert_eq!(instance_next.runtime.run_state, InstanceState::Running); + + let sled_agent = &cptestctx.sled_agent.sled_agent; + sled_agent + .set_instance_ensure_state_error(Some( + omicron_common::api::external::Error::internal_error( + "injected by test_instance_failed_after_sled_agent_error", + ), + )) + .await; + + let url = get_instance_url(format!("{}/reboot", instance_name).as_str()); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .expect_err("expected injected failure"); + + let instance_next = instance_get(&client, &instance_url).await; + assert_eq!(instance_next.runtime.run_state, InstanceState::Failed); + + NexusRequest::object_delete(client, &get_instance_url(instance_name)) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + sled_agent.set_instance_ensure_state_error(None).await; + + let instance = create_instance(client, PROJECT_NAME, instance_name).await; + instance_simulate(nexus, &instance.identity.id).await; + let instance_next = instance_get(&client, &instance_url).await; + assert_eq!(instance_next.runtime.run_state, InstanceState::Running); + + sled_agent + .set_instance_ensure_state_error(Some( + omicron_common::api::external::Error::internal_error( + "injected by test_instance_failed_after_sled_agent_error", + ), + )) + .await; + + let url = get_instance_url(format!("{}/stop", instance_name).as_str()); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .expect_err("expected injected failure"); + + let instance_next = instance_get(&client, &instance_url).await; + assert_eq!(instance_next.runtime.run_state, InstanceState::Failed); +} + /// Assert values for fleet, silo, and project using both system and silo /// metrics endpoints async fn assert_metrics( From 8ad838eac6b972134f3689c728126eb0f6311ffb Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 18 Dec 2023 16:20:31 -0500 Subject: [PATCH 113/186] Cleanup list uninitialized sled API (#4698) * Rename `uninitialized_sled_list` to `sled_list_uninitialized` * Rename endpoint from `v1/system/hardware/uninitialized-sleds` to `v1/system/hardware/sleds-uninitialized` * Add fake pagination to this API endpoint Fixes part of #4607 --- nexus/src/app/rack.rs | 2 +- nexus/src/external_api/http_entrypoints.rs | 12 ++-- nexus/tests/integration_tests/endpoints.rs | 2 +- nexus/tests/integration_tests/rack.rs | 15 ++-- nexus/tests/output/nexus_tags.txt | 2 +- openapi/nexus.json | 79 +++++++++++++--------- 6 files changed, 66 insertions(+), 46 deletions(-) diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 168e9eeaa3..c0307e5b5b 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -866,7 +866,7 @@ impl super::Nexus { // // TODO-multirack: We currently limit sleds to a single rack and we also // retrieve the `rack_uuid` from the Nexus instance used. - pub(crate) async fn uninitialized_sled_list( + pub(crate) async fn sled_list_uninitialized( &self, opctx: &OpContext, ) -> ListResultVec { diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 042ee294b7..8a4aeaeff5 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -227,7 +227,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(physical_disk_list)?; api.register(switch_list)?; api.register(switch_view)?; - api.register(uninitialized_sled_list)?; + api.register(sled_list_uninitialized)?; api.register(add_sled_to_initialized_rack)?; api.register(user_builtin_list)?; @@ -4654,18 +4654,18 @@ async fn rack_view( /// List uninitialized sleds in a given rack #[endpoint { method = GET, - path = "/v1/system/hardware/uninitialized-sleds", + path = "/v1/system/hardware/sleds-uninitialized", tags = ["system/hardware"] }] -async fn uninitialized_sled_list( +async fn sled_list_uninitialized( rqctx: RequestContext>, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let sleds = nexus.uninitialized_sled_list(&opctx).await?; - Ok(HttpResponseOk(sleds)) + let sleds = nexus.sled_list_uninitialized(&opctx).await?; + Ok(HttpResponseOk(ResultsPage { items: sleds, next_page: None })) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index c41fcdbed9..545129d567 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -47,7 +47,7 @@ lazy_static! { pub static ref HARDWARE_RACK_URL: String = format!("/v1/system/hardware/racks/{}", RACK_UUID); pub static ref HARDWARE_UNINITIALIZED_SLEDS: String = - format!("/v1/system/hardware/uninitialized-sleds"); + format!("/v1/system/hardware/sleds-uninitialized"); pub static ref HARDWARE_SLED_URL: String = format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID); pub static ref HARDWARE_SLED_PROVISION_STATE_URL: String = diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 9f77223871..a6fc93e92a 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use dropshot::ResultsPage; use http::Method; use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; @@ -85,18 +86,19 @@ async fn test_rack_initialization(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_uninitialized_sled_list(cptestctx: &ControlPlaneTestContext) { +async fn test_sled_list_uninitialized(cptestctx: &ControlPlaneTestContext) { let internal_client = &cptestctx.internal_client; let external_client = &cptestctx.external_client; - let list_url = "/v1/system/hardware/uninitialized-sleds"; + let list_url = "/v1/system/hardware/sleds-uninitialized"; let mut uninitialized_sleds = NexusRequest::object_get(external_client, &list_url) .authn_as(AuthnMode::PrivilegedUser) .execute() .await .expect("failed to get uninitialized sleds") - .parsed_body::>() - .unwrap(); + .parsed_body::>() + .unwrap() + .items; debug!(cptestctx.logctx.log, "{:#?}", uninitialized_sleds); // There are currently two fake sim gimlets created in the latest inventory @@ -137,8 +139,9 @@ async fn test_uninitialized_sled_list(cptestctx: &ControlPlaneTestContext) { .execute() .await .expect("failed to get uninitialized sleds") - .parsed_body::>() - .unwrap(); + .parsed_body::>() + .unwrap() + .items; debug!(cptestctx.logctx.log, "{:#?}", uninitialized_sleds); assert_eq!(1, uninitialized_sleds_2.len()); assert_eq!(uninitialized_sleds, uninitialized_sleds_2); diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 7e1dc306d5..10e7df7286 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -126,12 +126,12 @@ rack_list GET /v1/system/hardware/racks rack_view GET /v1/system/hardware/racks/{rack_id} sled_instance_list GET /v1/system/hardware/sleds/{sled_id}/instances sled_list GET /v1/system/hardware/sleds +sled_list_uninitialized GET /v1/system/hardware/sleds-uninitialized sled_physical_disk_list GET /v1/system/hardware/sleds/{sled_id}/disks sled_set_provision_state PUT /v1/system/hardware/sleds/{sled_id}/provision-state sled_view GET /v1/system/hardware/sleds/{sled_id} switch_list GET /v1/system/hardware/switches switch_view GET /v1/system/hardware/switches/{switch_id} -uninitialized_sled_list GET /v1/system/hardware/uninitialized-sleds API operations found with tag "system/metrics" OPERATION ID METHOD URL PATH diff --git a/openapi/nexus.json b/openapi/nexus.json index 2a18934718..4c89706a1c 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -4015,6 +4015,33 @@ } } }, + "/v1/system/hardware/sleds-uninitialized": { + "get": { + "tags": [ + "system/hardware" + ], + "summary": "List uninitialized sleds in a given rack", + "operationId": "sled_list_uninitialized", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UninitializedSledResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/hardware/switch-port": { "get": { "tags": [ @@ -4290,37 +4317,6 @@ } } }, - "/v1/system/hardware/uninitialized-sleds": { - "get": { - "tags": [ - "system/hardware" - ], - "summary": "List uninitialized sleds in a given rack", - "operationId": "uninitialized_sled_list", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "title": "Array_of_UninitializedSled", - "type": "array", - "items": { - "$ref": "#/components/schemas/UninitializedSled" - } - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/v1/system/identity-providers": { "get": { "tags": [ @@ -14888,6 +14884,27 @@ "rack_id" ] }, + "UninitializedSledResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/UninitializedSled" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "User": { "description": "View of a User", "type": "object", From e4641722b3036e10e0528dcb1d208218ea337337 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 19 Dec 2023 09:50:41 -0800 Subject: [PATCH 114/186] [mgs] Use slog-error-chain to clean up error types and logging (#4717) These are all pretty mechanical changes: * Use `#[source]` and `#[from]` correctly * Add a bit of context to a few error cases (e.g., including the `SpIdentifier` when returning an `SpCommunicationFailed` error) * Use `InlineErrorChain` instead of `anyhow` to convert error chains into strings (avoiding the intermediate `anyhow::Error` heap allocation) * Switch to `Utf8PathBuf` for command line args and related errors --- Cargo.lock | 21 ++++ Cargo.toml | 1 + gateway/Cargo.toml | 2 + gateway/src/bin/mgs.rs | 8 +- gateway/src/config.rs | 53 +++------ gateway/src/error.rs | 148 ++++++++++++++---------- gateway/src/http_entrypoints.rs | 193 +++++++++++++++++++++---------- gateway/src/lib.rs | 12 +- gateway/src/management_switch.rs | 16 ++- gateway/src/serial_console.rs | 16 ++- 10 files changed, 291 insertions(+), 179 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 121e31550f..74c01d3411 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4698,6 +4698,7 @@ version = "0.1.0" dependencies = [ "anyhow", "base64", + "camino", "clap 4.4.3", "dropshot", "expectorate", @@ -4723,6 +4724,7 @@ dependencies = [ "signal-hook-tokio", "slog", "slog-dtrace", + "slog-error-chain", "sp-sim", "subprocess", "thiserror", @@ -7842,6 +7844,25 @@ dependencies = [ "slog-term", ] +[[package]] +name = "slog-error-chain" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f69041f45774602108e47fb25e705dc23acfb2" +dependencies = [ + "slog", + "slog-error-chain-derive", +] + +[[package]] +name = "slog-error-chain-derive" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f69041f45774602108e47fb25e705dc23acfb2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.32", +] + [[package]] name = "slog-json" version = "2.6.1" diff --git a/Cargo.toml b/Cargo.toml index 841c7bb16b..ca134536f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -339,6 +339,7 @@ slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "relea slog-async = "2.8" slog-dtrace = "0.2" slog-envlogger = "2.2" +slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" smf = "0.2" snafu = "0.7" diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 75c31e9977..f2e5f83a8a 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true base64.workspace = true +camino.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -25,6 +26,7 @@ signal-hook.workspace = true signal-hook-tokio.workspace = true slog.workspace = true slog-dtrace.workspace = true +slog-error-chain.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-stream.workspace = true diff --git a/gateway/src/bin/mgs.rs b/gateway/src/bin/mgs.rs index 6917d4f174..39810ea06a 100644 --- a/gateway/src/bin/mgs.rs +++ b/gateway/src/bin/mgs.rs @@ -5,6 +5,7 @@ //! Executable program to run gateway, the management gateway service use anyhow::{anyhow, Context}; +use camino::Utf8PathBuf; use clap::Parser; use futures::StreamExt; use omicron_common::cmd::{fatal, CmdError}; @@ -12,7 +13,6 @@ use omicron_gateway::{run_openapi, start_server, Config, MgsArguments}; use signal_hook::consts::signal; use signal_hook_tokio::Signals; use std::net::SocketAddrV6; -use std::path::PathBuf; use uuid::Uuid; #[derive(Debug, Parser)] @@ -24,7 +24,7 @@ enum Args { /// Start an MGS server Run { #[clap(name = "CONFIG_FILE_PATH", action)] - config_file_path: PathBuf, + config_file_path: Utf8PathBuf, /// Read server ID and address(es) for dropshot server from our SMF /// properties (only valid when running as a service on illumos) @@ -81,9 +81,7 @@ async fn do_run() -> Result<(), CmdError> { address, } => { let config = Config::from_file(&config_file_path) - .with_context(|| { - format!("failed to parse {}", config_file_path.display()) - }) + .map_err(anyhow::Error::new) .map_err(CmdError::Failure)?; let mut signals = Signals::new([signal::SIGUSR1]) diff --git a/gateway/src/config.rs b/gateway/src/config.rs index adbd16c6a1..afdb046881 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -6,10 +6,11 @@ //! configuration use crate::management_switch::SwitchConfig; +use camino::Utf8Path; +use camino::Utf8PathBuf; use dropshot::ConfigLogging; use serde::{Deserialize, Serialize}; -use std::path::Path; -use std::path::PathBuf; +use slog_error_chain::SlogInlineError; use thiserror::Error; /// Configuration for a gateway server @@ -30,13 +31,11 @@ impl Config { /// Load a `Config` from the given TOML file /// /// This config object can then be used to create a new gateway server. - // The format is described in the README. // TODO add a README - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); + pub fn from_file(path: &Utf8Path) -> Result { let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; + .map_err(|err| LoadError::Io { path: path.into(), err })?; let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; + .map_err(|err| LoadError::Parse { path: path.into(), err })?; Ok(config_parsed) } } @@ -46,32 +45,18 @@ pub struct PartialDropshotConfig { pub request_body_max_bytes: usize, } -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum LoadError { - #[error("error reading \"{}\": {}", path.display(), err)] - Io { path: PathBuf, err: std::io::Error }, - #[error("error parsing \"{}\": {}", path.display(), err)] - Parse { path: PathBuf, err: toml::de::Error }, -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError::Io { path, err } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError::Parse { path, err } - } -} - -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadError::Io { err, .. } = self { - err.kind() == other.kind() - } else { - false - } - } + #[error("error reading \"{path}\"")] + Io { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + #[error("error parsing \"{path}\"")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, } diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 6daf9312ba..5933daa340 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -5,16 +5,17 @@ //! Error handling facilities for the management gateway. use crate::management_switch::SpIdentifier; -use anyhow::anyhow; use dropshot::HttpError; use gateway_messages::SpError; pub use gateway_sp_comms::error::CommunicationError; use gateway_sp_comms::error::UpdateError; use gateway_sp_comms::BindError; +use slog_error_chain::InlineErrorChain; +use slog_error_chain::SlogInlineError; use std::time::Duration; use thiserror::Error; -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum StartupError { #[error("invalid configuration file: {}", .reasons.join(", "))] InvalidConfig { reasons: Vec }, @@ -23,116 +24,137 @@ pub enum StartupError { BindError(#[from] BindError), } -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { #[error("discovery process not yet complete")] DiscoveryNotYetComplete, #[error("location discovery failed: {reason}")] DiscoveryFailed { reason: String }, - #[error("nonexistent SP (type {:?}, slot {})", .0.typ, .0.slot)] + #[error("nonexistent SP {0:?}")] SpDoesNotExist(SpIdentifier), - #[error( - "unknown socket address for SP (type {:?}, slot {})", - .0.typ, - .0.slot, - )] + #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( "timeout ({timeout:?}) elapsed communicating with {sp:?} on port {port}" )] Timeout { timeout: Duration, port: usize, sp: Option }, - #[error("error communicating with SP: {0}")] - SpCommunicationFailed(#[from] CommunicationError), - #[error("updating SP failed: {0}")] - UpdateFailed(#[from] UpdateError), + #[error("error communicating with SP {sp:?}")] + SpCommunicationFailed { + sp: SpIdentifier, + #[source] + err: CommunicationError, + }, + #[error("updating SP {sp:?} failed")] + UpdateFailed { + sp: SpIdentifier, + #[source] + err: UpdateError, + }, } impl From for HttpError { - fn from(err: SpCommsError) -> Self { - match err { + fn from(error: SpCommsError) -> Self { + match error { SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( Some("InvalidSp".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError( - SpError::SerialConsoleAlreadyAttached, - ), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError( + SpError::SerialConsoleAlreadyAttached, + ), + .. + } => HttpError::for_bad_request( Some("SerialConsoleAttached".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError(SpError::RequestUnsupportedForSp), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError(SpError::RequestUnsupportedForSp), + .. + } => HttpError::for_bad_request( Some("RequestUnsupportedForSp".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError( - SpError::RequestUnsupportedForComponent, - ), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError( + SpError::RequestUnsupportedForComponent, + ), + .. + } => HttpError::for_bad_request( Some("RequestUnsupportedForComponent".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError(SpError::InvalidSlotForComponent), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError(SpError::InvalidSlotForComponent), + .. + } => HttpError::for_bad_request( Some("InvalidSlotForComponent".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::UpdateFailed(UpdateError::ImageTooLarge) => { - HttpError::for_bad_request( - Some("ImageTooLarge".to_string()), - format!("{:#}", anyhow!(err)), - ) - } - SpCommsError::UpdateFailed(UpdateError::Communication( - CommunicationError::SpError(SpError::UpdateSlotBusy), - )) => http_err_with_message( + SpCommsError::UpdateFailed { + err: UpdateError::ImageTooLarge, + .. + } => HttpError::for_bad_request( + Some("ImageTooLarge".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpCommsError::UpdateFailed { + err: + UpdateError::Communication(CommunicationError::SpError( + SpError::UpdateSlotBusy, + )), + .. + } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateSlotBusy", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::UpdateFailed(UpdateError::Communication( - CommunicationError::SpError(SpError::UpdateInProgress { - .. - }), - )) => http_err_with_message( + SpCommsError::UpdateFailed { + err: + UpdateError::Communication(CommunicationError::SpError( + SpError::UpdateInProgress { .. }, + )), + .. + } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateInProgress", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::DiscoveryNotYetComplete => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "DiscoveryNotYetComplete", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::DiscoveryFailed { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "DiscoveryFailed ", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed(_) => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "SpCommunicationFailed", - format!("{:#}", anyhow!(err)), - ), - SpCommsError::UpdateFailed(_) => http_err_with_message( + SpCommsError::SpCommunicationFailed { .. } => { + http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "SpCommunicationFailed", + InlineErrorChain::new(&error).to_string(), + ) + } + SpCommsError::UpdateFailed { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateFailed", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), } } diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index 2db6121f1d..e33e8dd4a6 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -566,10 +566,12 @@ async fn sp_get( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp_id = path.into_inner().sp; - let sp = apictx.mgmt_switch.sp(sp_id.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.state().await.map_err(SpCommsError::from)?; + let state = sp.state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(state.into())) } @@ -588,9 +590,12 @@ async fn sp_startup_options_get( ) -> Result, HttpError> { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - let options = sp.get_startup_options().await.map_err(SpCommsError::from)?; + let options = sp.get_startup_options().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(options.into())) } @@ -610,11 +615,12 @@ async fn sp_startup_options_set( ) -> Result { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - sp.set_startup_options(body.into_inner().into()) - .await - .map_err(SpCommsError::from)?; + sp.set_startup_options(body.into_inner().into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -632,8 +638,11 @@ async fn sp_component_list( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - let inventory = sp.inventory().await.map_err(SpCommsError::from)?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; + let inventory = sp.inventory().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(inventory.into())) } @@ -653,11 +662,13 @@ async fn sp_component_get( ) -> Result>, HttpError> { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let details = - sp.component_details(component).await.map_err(SpCommsError::from)?; + let details = sp.component_details(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(details.entries.into_iter().map(Into::into).collect())) } @@ -690,7 +701,8 @@ async fn sp_component_caboose_get( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let ComponentCabooseSlot { firmware_slot } = query_params.into_inner(); let component = component_from_str(&component)?; @@ -714,19 +726,31 @@ async fn sp_component_caboose_get( CABOOSE_KEY_GIT_COMMIT, ) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let board = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_BOARD) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let name = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_NAME) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let version = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_VERSION) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; @@ -752,10 +776,13 @@ async fn sp_component_clear_status( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - sp.component_clear_status(component).await.map_err(SpCommsError::from)?; + sp.component_clear_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -775,13 +802,13 @@ async fn sp_component_active_slot_get( ) -> Result, HttpError> { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let slot = sp - .component_active_slot(component) - .await - .map_err(SpCommsError::from)?; + let slot = sp.component_active_slot(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) } @@ -809,14 +836,15 @@ async fn sp_component_active_slot_set( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let slot = body.into_inner().slot; let persist = query_params.into_inner().persist; - sp.set_component_active_slot(component, slot, persist) - .await - .map_err(SpCommsError::from)?; + sp.set_component_active_slot(component, slot, persist).await.map_err( + |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, + )?; Ok(HttpResponseUpdatedNoContent {}) } @@ -843,21 +871,27 @@ async fn sp_component_serial_console_attach( ) -> WebsocketEndpointResult { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); + let sp_id = sp.into(); let component = component_from_str(&component)?; // Ensure we can attach to this SP's serial console. let console = apictx .mgmt_switch - .sp(sp.into())? + .sp(sp_id)? .serial_console_attach(component) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let log = apictx.log.new(slog::o!("sp" => format!("{sp:?}"))); // We've successfully attached to the SP's serial console: upgrade the // websocket and run our side of that connection. - websocket.handle(move |conn| crate::serial_console::run(console, conn, log)) + websocket.handle(move |conn| { + crate::serial_console::run(sp_id, console, conn, log) + }) } /// Detach the websocket connection attached to the given SP component's serial @@ -875,9 +909,12 @@ async fn sp_component_serial_console_detach( // TODO-cleanup: "component" support for the serial console is half baked; // we don't use it at all to detach. let PathSpComponent { sp, component: _ } = path.into_inner(); + let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp.into())?; - sp.serial_console_detach().await.map_err(SpCommsError::from)?; + let sp = apictx.mgmt_switch.sp(sp_id)?; + sp.serial_console_detach().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -927,13 +964,17 @@ async fn sp_component_reset( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; sp.reset_component_prepare(component) .and_then(|()| sp.reset_component_trigger(component)) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -964,7 +1005,8 @@ async fn sp_component_update( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let ComponentUpdateIdSlot { id, firmware_slot } = query_params.into_inner(); @@ -973,7 +1015,7 @@ async fn sp_component_update( sp.start_update(component, id, firmware_slot, image) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -993,11 +1035,13 @@ async fn sp_component_update_status( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let status = - sp.update_status(component).await.map_err(SpCommsError::from)?; + let status = sp.update_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(status.into())) } @@ -1020,11 +1064,14 @@ async fn sp_component_update_abort( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let UpdateAbortBody { id } = body.into_inner(); - sp.update_abort(component, id).await.map_err(SpCommsError::from)?; + sp.update_abort(component, id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1043,6 +1090,7 @@ async fn sp_rot_cmpa_get( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); + let sp_id = sp.into(); // Ensure the caller knows they're asking for the RoT if component_from_str(&component)? != SpComponent::ROT { @@ -1052,8 +1100,10 @@ async fn sp_rot_cmpa_get( )); } - let sp = apictx.mgmt_switch.sp(sp.into())?; - let data = sp.read_rot_cmpa().await.map_err(SpCommsError::from)?; + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = sp.read_rot_cmpa().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; let base64_data = base64::engine::general_purpose::STANDARD.encode(data); @@ -1076,6 +1126,7 @@ async fn sp_rot_cfpa_get( let PathSpComponent { sp, component } = path.into_inner(); let GetCfpaParams { slot } = params.into_inner(); + let sp_id = sp.into(); // Ensure the caller knows they're asking for the RoT if component_from_str(&component)? != SpComponent::ROT { @@ -1085,13 +1136,13 @@ async fn sp_rot_cfpa_get( )); } - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp = apictx.mgmt_switch.sp(sp_id)?; let data = match slot { RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, } - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp: sp_id, err })?; let base64_data = base64::engine::general_purpose::STANDARD.encode(data); @@ -1141,16 +1192,19 @@ async fn ignition_get( let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = path.into_inner().sp; - let ignition_target = mgmt_switch.ignition_target(sp.into())?; + let sp_id = path.into_inner().sp.into(); + let ignition_target = mgmt_switch.ignition_target(sp_id)?; let state = mgmt_switch .ignition_controller() .ignition_state(ignition_target) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; - let info = SpIgnitionInfo { id: sp, details: state.into() }; + let info = SpIgnitionInfo { id: sp_id.into(), details: state.into() }; Ok(HttpResponseOk(info)) } @@ -1173,13 +1227,17 @@ async fn ignition_command( let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; let PathSpIgnitionCommand { sp, command } = path.into_inner(); - let ignition_target = mgmt_switch.ignition_target(sp.into())?; + let sp_id = sp.into(); + let ignition_target = mgmt_switch.ignition_target(sp_id)?; mgmt_switch .ignition_controller() .ignition_command(ignition_target, command.into()) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1197,9 +1255,12 @@ async fn sp_power_state_get( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = sp.power_state().await.map_err(SpCommsError::from)?; + let power_state = sp.power_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(power_state.into())) } @@ -1218,10 +1279,13 @@ async fn sp_power_state_set( body: TypedBody, ) -> Result { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let power_state = body.into_inner(); - sp.set_power_state(power_state.into()).await.map_err(SpCommsError::from)?; + sp.set_power_state(power_state.into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1241,7 +1305,8 @@ async fn sp_installinator_image_id_set( use ipcc_key_value::Key; let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let image_id = ipcc_key_value::InstallinatorImageId::from(body.into_inner()); @@ -1251,7 +1316,7 @@ async fn sp_installinator_image_id_set( image_id.serialize(), ) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp: sp_id, err })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1268,12 +1333,16 @@ async fn sp_installinator_image_id_delete( use ipcc_key_value::Key; let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; // We clear the image ID by setting it to a 0-length vec. sp.set_ipcc_key_lookup_value(Key::InstallinatorImageId as u8, Vec::new()) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 10fcf1539c..5aa833f6e2 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -35,6 +35,7 @@ use slog::info; use slog::o; use slog::warn; use slog::Logger; +use slog_error_chain::InlineErrorChain; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::mem; @@ -138,7 +139,10 @@ impl Server { match gateway_sp_comms::register_probes() { Ok(_) => debug!(log, "successfully registered DTrace USDT probes"), Err(err) => { - warn!(log, "failed to register DTrace USDT probes: {}", err); + warn!( + log, "failed to register DTrace USDT probes"; + InlineErrorChain::new(&err), + ); } } @@ -328,9 +332,9 @@ pub async fn start_server( ); let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { - let msg = format!("failed to register DTrace probes: {}", e); - error!(log, "{}", msg); - return Err(msg); + let err = InlineErrorChain::new(&e); + error!(log, "failed to register DTrace probes"; &err); + return Err(format!("failed to register DTrace probes: {err}")); } else { debug!(log, "registered DTrace probes"); } diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index 03fdda2cca..0571dc051e 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -383,7 +383,14 @@ impl ManagementSwitch { > { let controller = self.ignition_controller(); let location_map = self.location_map()?; - let bulk_state = controller.bulk_ignition_state().await?; + let bulk_state = + controller.bulk_ignition_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { + sp: location_map + .port_to_id(self.local_ignition_controller_port), + err, + } + })?; Ok(bulk_state.into_iter().enumerate().filter_map(|(target, state)| { // If the SP returns an ignition target we don't have a port @@ -402,11 +409,8 @@ impl ManagementSwitch { None => { warn!( self.log, - concat!( - "ignoring unknown ignition target {}", - " returned by ignition controller SP" - ), - target, + "ignoring unknown ignition target {target} \ + returned by ignition controller SP", ); None } diff --git a/gateway/src/serial_console.rs b/gateway/src/serial_console.rs index 3e49f8526a..49aa807e55 100644 --- a/gateway/src/serial_console.rs +++ b/gateway/src/serial_console.rs @@ -5,6 +5,7 @@ // Copyright 2022 Oxide Computer Company use crate::error::SpCommsError; +use crate::SpIdentifier; use dropshot::WebsocketChannelResult; use dropshot::WebsocketConnection; use futures::stream::SplitSink; @@ -19,6 +20,7 @@ use slog::error; use slog::info; use slog::warn; use slog::Logger; +use slog_error_chain::SlogInlineError; use std::borrow::Cow; use std::ops::Deref; use std::ops::DerefMut; @@ -34,7 +36,7 @@ use tokio_tungstenite::tungstenite::protocol::WebSocketConfig; use tokio_tungstenite::tungstenite::Message; use tokio_tungstenite::WebSocketStream; -#[derive(Debug, thiserror::Error)] +#[derive(Debug, thiserror::Error, SlogInlineError)] enum SerialTaskError { #[error(transparent)] SpCommsError(#[from] SpCommsError), @@ -43,6 +45,7 @@ enum SerialTaskError { } pub(crate) async fn run( + sp: SpIdentifier, console: AttachedSerialConsole, conn: WebsocketConnection, log: Logger, @@ -80,7 +83,7 @@ pub(crate) async fn run( let (console_tx, mut console_rx) = console.split(); let console_tx = DetachOnDrop::new(console_tx); let mut ws_recv_handle = - tokio::spawn(ws_recv_task(ws_stream, console_tx, log.clone())); + tokio::spawn(ws_recv_task(sp, ws_stream, console_tx, log.clone())); loop { tokio::select! { @@ -112,7 +115,9 @@ pub(crate) async fn run( Ok(()) => (), Err(TrySendError::Full(data)) => { warn!( - log, "channel full; discarding serial console data from SP"; + log, + "channel full; discarding serial \ + console data from SP"; "length" => data.len(), ); } @@ -160,6 +165,7 @@ async fn ws_sink_task( } async fn ws_recv_task( + sp: SpIdentifier, mut ws_stream: SplitStream>, mut console_tx: DetachOnDrop, log: Logger, @@ -175,7 +181,7 @@ async fn ws_recv_task( console_tx .write(data) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp, err })?; keepalive.reset(); } Some(Ok(Message::Close(_))) | None => { @@ -194,7 +200,7 @@ async fn ws_recv_task( } _= keepalive.tick() => { - console_tx.keepalive().await.map_err(SpCommsError::from)?; + console_tx.keepalive().await.map_err(|err| SpCommsError::SpCommunicationFailed { sp, err })?; } } } From 3382a33887a42db409bf3b8c780cd4125cc35f51 Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Tue, 19 Dec 2023 12:34:20 -0800 Subject: [PATCH 115/186] fix Name regex in json schema (#4718) --- common/src/api/external/mod.rs | 2 +- common/tests/output/pagination-schema.txt | 4 ++-- openapi/bootstrap-agent.json | 2 +- openapi/nexus-internal.json | 4 ++-- openapi/nexus.json | 4 ++-- openapi/sled-agent.json | 2 +- schema/all-zone-requests.json | 2 +- schema/all-zones-requests.json | 2 +- schema/rss-service-plan-v2.json | 2 +- schema/rss-sled-plan.json | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index aa783ac9ca..446152137a 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -316,7 +316,7 @@ impl JsonSchema for Name { r#"^"#, // Cannot match a UUID r#"(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)"#, - r#"^[a-z][a-z0-9-]*[a-zA-Z0-9]*"#, + r#"^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?"#, r#"$"#, ) .to_string(), diff --git a/common/tests/output/pagination-schema.txt b/common/tests/output/pagination-schema.txt index 7cbaf439d6..436e614994 100644 --- a/common/tests/output/pagination-schema.txt +++ b/common/tests/output/pagination-schema.txt @@ -139,7 +139,7 @@ schema for pagination parameters: page selector, scan by name only "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NameSortMode": { "description": "Supported set of sort modes for scanning by name only\n\nCurrently, we only support scanning in ascending order.", @@ -228,7 +228,7 @@ schema for pagination parameters: page selector, scan by name or id "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NameOrId": { "oneOf": [ diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 0c5bd15050..2a7ff43202 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -491,7 +491,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index f909710ab4..a1d70d838b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4268,7 +4268,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, @@ -5578,7 +5578,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, diff --git a/openapi/nexus.json b/openapi/nexus.json index 4c89706a1c..35586375e8 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -12812,7 +12812,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, @@ -15020,7 +15020,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index d71f8de644..6076df6dbb 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5246,7 +5246,7 @@ "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, "maxLength": 63 }, diff --git a/schema/all-zone-requests.json b/schema/all-zone-requests.json index 4eb56d379d..8c324a15bd 100644 --- a/schema/all-zone-requests.json +++ b/schema/all-zone-requests.json @@ -210,7 +210,7 @@ "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NetworkInterface": { "description": "Information required to construct a virtual network interface", diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 0e43e9ee21..7a07e2f9ae 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -94,7 +94,7 @@ "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NetworkInterface": { "description": "Information required to construct a virtual network interface", diff --git a/schema/rss-service-plan-v2.json b/schema/rss-service-plan-v2.json index 0bcd27b9cc..62ce358938 100644 --- a/schema/rss-service-plan-v2.json +++ b/schema/rss-service-plan-v2.json @@ -179,7 +179,7 @@ "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NetworkInterface": { "description": "Information required to construct a virtual network interface", diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 2ef7a7b58a..0396ccc685 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -355,7 +355,7 @@ "type": "string", "maxLength": 63, "minLength": 1, - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z][a-z0-9-]*[a-zA-Z0-9]*$" + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" }, "NewPasswordHash": { "title": "A password hash in PHC string format", From 6783a5af9361a41840959fbb614d4bbd064b4f45 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 19 Dec 2023 18:32:26 -0500 Subject: [PATCH 116/186] Fix fake pagination for sled_list_uninitialized (#4720) --- nexus/src/external_api/http_entrypoints.rs | 8 +++++++ openapi/nexus.json | 25 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 8a4aeaeff5..3e38558760 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -4659,8 +4659,16 @@ async fn rack_view( }] async fn sled_list_uninitialized( rqctx: RequestContext>, + query: Query>, ) -> Result>, HttpError> { let apictx = rqctx.context(); + // We don't actually support real pagination + let pag_params = query.into_inner(); + if let dropshot::WhichPage::Next(last_seen) = &pag_params.page { + return Err( + Error::invalid_value(last_seen.clone(), "bad page token").into() + ); + } let handler = async { let nexus = &apictx.nexus; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; diff --git a/openapi/nexus.json b/openapi/nexus.json index 35586375e8..4131460149 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -4022,6 +4022,28 @@ ], "summary": "List uninitialized sleds in a given rack", "operationId": "sled_list_uninitialized", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + } + ], "responses": { "200": { "description": "successful operation", @@ -4039,6 +4061,9 @@ "5XX": { "$ref": "#/components/responses/Error" } + }, + "x-dropshot-pagination": { + "required": [] } } }, From f2fb5af6e3c86fc231768f6faf58281b912a33e4 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 19 Dec 2023 22:07:16 -0500 Subject: [PATCH 117/186] Deserialize pre-6.0.0 RegionSnapshot objects (#4721) Schema update 6.0.0 added the `deleted` column to the region_snapshot table and added the `deleted` field to the RegionSnapshot object. If an old RegionSnapshot was serialized before this schema update (as part of a volume delete) into the `resources_to_clean_up` column of the volume table, _and_ if that volume delete failed and unwound, Nexus will fail to deserialize that column after that schema update + model change if there is another request to delete that volume. Add `#[serde(default)]` to RegionSnapshot's deleting field so that Nexus can deserialize pre-6.0.0 RegionSnapshot objects. This will default to `false` which matches what the ALTER COLUMN's default setting was in the 6.0.0 schema upgrade. Fixes oxidecomputer/customer-support#72 --- nexus/db-model/src/region_snapshot.rs | 10 +- nexus/db-queries/src/db/datastore/volume.rs | 115 ++++++++++++++++++++ 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs index af1cf8b2b3..2ea59f99f0 100644 --- a/nexus/db-model/src/region_snapshot.rs +++ b/nexus/db-model/src/region_snapshot.rs @@ -27,12 +27,16 @@ pub struct RegionSnapshot { pub region_id: Uuid, pub snapshot_id: Uuid, - // used for identifying volumes that reference this + /// used for identifying volumes that reference this pub snapshot_addr: String, - // how many volumes reference this? + /// how many volumes reference this? pub volume_references: i64, - // true if part of a volume's `resources_to_clean_up` already + /// true if part of a volume's `resources_to_clean_up` already + // this column was added in `schema/crdb/6.0.0/up1.sql` with a default of + // false, so instruct serde to deserialize default as false if an old + // serialized version of RegionSnapshot is being deserialized. + #[serde(default)] pub deleting: bool, } diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 4f31efd610..d0b093ff45 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1059,3 +1059,118 @@ pub fn read_only_resources_associated_with_volume( } } } + +#[cfg(test)] +mod tests { + use super::*; + + use crate::db::datastore::datastore_test; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + + // Assert that Nexus will not fail to deserialize an old version of + // CrucibleResources that was serialized before schema update 6.0.0. + #[tokio::test] + async fn test_deserialize_old_crucible_resources() { + let logctx = + dev::test_setup_log("test_deserialize_old_crucible_resources"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let (_opctx, db_datastore) = datastore_test(&logctx, &db).await; + + // Start with a fake volume, doesn't matter if it's empty + + let volume_id = Uuid::new_v4(); + let _volume = db_datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + + // Add old CrucibleResources json in the `resources_to_clean_up` column - + // this was before the `deleting` column / field was added to + // ResourceSnapshot. + + { + use db::schema::volume::dsl; + + let conn = + db_datastore.pool_connection_unauthorized().await.unwrap(); + + let resources_to_clean_up = r#"{ + "V1": { + "datasets_and_regions": [], + "datasets_and_snapshots": [ + [ + { + "identity": { + "id": "844ee8d5-7641-4b04-bca8-7521e258028a", + "time_created": "2023-12-19T21:38:34.000000Z", + "time_modified": "2023-12-19T21:38:34.000000Z" + }, + "time_deleted": null, + "rcgen": 1, + "pool_id": "81a98506-4a97-4d92-8de5-c21f6fc71649", + "ip": "fd00:1122:3344:101::1", + "port": 32345, + "kind": "Crucible", + "size_used": 10737418240 + }, + { + "dataset_id": "b69edd77-1b3e-4f11-978c-194a0a0137d0", + "region_id": "8d668bf9-68cc-4387-8bc0-b4de7ef9744f", + "snapshot_id": "f548332c-6026-4eff-8c1c-ba202cd5c834", + "snapshot_addr": "[fd00:1122:3344:101::2]:19001", + "volume_references": 0 + } + ] + ] + } +} +"#; + + diesel::update(dsl::volume) + .filter(dsl::id.eq(volume_id)) + .set(dsl::resources_to_clean_up.eq(resources_to_clean_up)) + .execute_async(&*conn) + .await + .unwrap(); + } + + // Soft delete the volume, which runs the CTE + + let cr = db_datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + // Assert the contents of the returned CrucibleResources + + let datasets_and_regions = + db_datastore.regions_to_delete(&cr).await.unwrap(); + let datasets_and_snapshots = + db_datastore.snapshots_to_delete(&cr).await.unwrap(); + + assert!(datasets_and_regions.is_empty()); + assert_eq!(datasets_and_snapshots.len(), 1); + + let region_snapshot = &datasets_and_snapshots[0].1; + + assert_eq!( + region_snapshot.snapshot_id, + "f548332c-6026-4eff-8c1c-ba202cd5c834".parse().unwrap() + ); + assert_eq!(region_snapshot.deleting, false); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} From 94944ccf5349479c9f0d1235fe504f2570253474 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 19 Dec 2023 22:56:26 -0800 Subject: [PATCH 118/186] Remove lazy_static in favor of once_cell (#4699) Fixes https://github.com/oxidecomputer/omicron/issues/4697 --- Cargo.lock | 7 +- Cargo.toml | 1 - common/Cargo.toml | 2 +- common/src/address.rs | 108 +- nexus/Cargo.toml | 1 - nexus/db-queries/Cargo.toml | 2 +- nexus/db-queries/src/authn/external/spoof.rs | 29 +- nexus/db-queries/src/authz/api_resources.rs | 7 +- .../db-queries/src/db/datastore/silo_user.rs | 10 +- nexus/db-queries/src/db/fixed_data/mod.rs | 15 +- nexus/db-queries/src/db/fixed_data/project.rs | 20 +- .../src/db/fixed_data/role_assignment.rs | 13 +- .../src/db/fixed_data/role_builtin.rs | 34 +- nexus/db-queries/src/db/fixed_data/silo.rs | 104 +- .../db-queries/src/db/fixed_data/silo_user.rs | 54 +- .../src/db/fixed_data/user_builtin.rs | 106 +- nexus/db-queries/src/db/fixed_data/vpc.rs | 42 +- .../src/db/fixed_data/vpc_firewall_rule.rs | 73 +- .../src/db/fixed_data/vpc_subnet.rs | 53 +- .../src/db/queries/network_interface.rs | 60 +- nexus/db-queries/src/db/saga_recovery.rs | 12 +- nexus/defaults/Cargo.toml | 2 +- nexus/defaults/src/lib.rs | 88 +- nexus/src/external_api/console_api.rs | 12 +- nexus/tests/integration_tests/endpoints.rs | 1256 +++++++++-------- nexus/tests/integration_tests/unauthorized.rs | 32 +- 26 files changed, 1155 insertions(+), 988 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 74c01d3411..962fe68e02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4164,7 +4164,6 @@ dependencies = [ "internal-dns", "ipnetwork", "itertools 0.12.0", - "lazy_static", "macaddr", "newtype_derive", "nexus-db-model", @@ -4177,6 +4176,7 @@ dependencies = [ "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", + "once_cell", "openapiv3", "openssl", "oso", @@ -4213,9 +4213,9 @@ name = "nexus-defaults" version = "0.1.0" dependencies = [ "ipnetwork", - "lazy_static", "omicron-common", "omicron-workspace-hack", + "once_cell", "rand 0.8.5", "serde_json", ] @@ -4618,10 +4618,10 @@ dependencies = [ "hex", "http", "ipnetwork", - "lazy_static", "libc", "macaddr", "omicron-workspace-hack", + "once_cell", "parse-display", "progenitor", "proptest", @@ -4773,7 +4773,6 @@ dependencies = [ "internal-dns", "ipnetwork", "itertools 0.12.0", - "lazy_static", "macaddr", "mg-admin-client", "mime_guess", diff --git a/Cargo.toml b/Cargo.toml index ca134536f5..d651a13bf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,7 +229,6 @@ ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" -lazy_static = "1.4.0" libc = "0.2.151" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } diff --git a/common/Cargo.toml b/common/Cargo.toml index 49997e619c..3941f5303e 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -17,7 +17,6 @@ hex.workspace = true http.workspace = true ipnetwork.workspace = true macaddr.workspace = true -lazy_static.workspace = true proptest = { workspace = true, optional = true } rand.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } @@ -38,6 +37,7 @@ uuid.workspace = true parse-display.workspace = true progenitor.workspace = true omicron-workspace-hack.workspace = true +once_cell.workspace = true [dev-dependencies] camino-tempfile.workspace = true diff --git a/common/src/address.rs b/common/src/address.rs index 992e8f0406..94361a2705 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -9,6 +9,7 @@ use crate::api::external::{self, Error, Ipv4Net, Ipv6Net}; use ipnetwork::{Ipv4Network, Ipv6Network}; +use once_cell::sync::Lazy; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; @@ -76,65 +77,78 @@ pub const NTP_PORT: u16 = 123; // that situation (which may be as soon as allocating ephemeral IPs). pub const NUM_SOURCE_NAT_PORTS: u16 = 1 << 14; -lazy_static::lazy_static! { - // Services that require external connectivity are given an OPTE port - // with a "Service VNIC" record. Like a "Guest VNIC", a service is - // placed within a VPC (a built-in services VPC), along with a VPC subnet. - // But unlike guest instances which are created at runtime by Nexus, these - // services are created by RSS early on. So, we have some fixed values - // used to bootstrap service OPTE ports. Each service kind uses a distinct - // VPC subnet which RSS will allocate addresses from for those services. - // The specific values aren't deployment-specific as they are virtualized - // within OPTE. - - /// The IPv6 prefix assigned to the built-in services VPC. - // The specific prefix here was randomly chosen from the expected VPC - // prefix range (`fd00::/48`). See `random_vpc_ipv6_prefix`. - // Furthermore, all the below *_OPTE_IPV6_SUBNET constants are - // /64's within this prefix. - pub static ref SERVICE_VPC_IPV6_PREFIX: Ipv6Net = Ipv6Net( +// Services that require external connectivity are given an OPTE port +// with a "Service VNIC" record. Like a "Guest VNIC", a service is +// placed within a VPC (a built-in services VPC), along with a VPC subnet. +// But unlike guest instances which are created at runtime by Nexus, these +// services are created by RSS early on. So, we have some fixed values +// used to bootstrap service OPTE ports. Each service kind uses a distinct +// VPC subnet which RSS will allocate addresses from for those services. +// The specific values aren't deployment-specific as they are virtualized +// within OPTE. + +/// The IPv6 prefix assigned to the built-in services VPC. +// The specific prefix here was randomly chosen from the expected VPC +// prefix range (`fd00::/48`). See `random_vpc_ipv6_prefix`. +// Furthermore, all the below *_OPTE_IPV6_SUBNET constants are +// /64's within this prefix. +pub static SERVICE_VPC_IPV6_PREFIX: Lazy = Lazy::new(|| { + Ipv6Net( Ipv6Network::new( Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 0, 0, 0, 0, 0), Ipv6Net::VPC_IPV6_PREFIX_LENGTH, - ).unwrap(), - ); - - /// The IPv4 subnet for External DNS OPTE ports. - pub static ref DNS_OPTE_IPV4_SUBNET: Ipv4Net = - Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 1, 0), 24).unwrap()); - - /// The IPv6 subnet for External DNS OPTE ports. - pub static ref DNS_OPTE_IPV6_SUBNET: Ipv6Net = Ipv6Net( + ) + .unwrap(), + ) +}); + +/// The IPv4 subnet for External DNS OPTE ports. +pub static DNS_OPTE_IPV4_SUBNET: Lazy = Lazy::new(|| { + Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 1, 0), 24).unwrap()) +}); + +/// The IPv6 subnet for External DNS OPTE ports. +pub static DNS_OPTE_IPV6_SUBNET: Lazy = Lazy::new(|| { + Ipv6Net( Ipv6Network::new( Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 1, 0, 0, 0, 0), Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH, - ).unwrap(), - ); - - /// The IPv4 subnet for Nexus OPTE ports. - pub static ref NEXUS_OPTE_IPV4_SUBNET: Ipv4Net = - Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 2, 0), 24).unwrap()); - - /// The IPv6 subnet for Nexus OPTE ports. - pub static ref NEXUS_OPTE_IPV6_SUBNET: Ipv6Net = Ipv6Net( + ) + .unwrap(), + ) +}); + +/// The IPv4 subnet for Nexus OPTE ports. +pub static NEXUS_OPTE_IPV4_SUBNET: Lazy = Lazy::new(|| { + Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 2, 0), 24).unwrap()) +}); + +/// The IPv6 subnet for Nexus OPTE ports. +pub static NEXUS_OPTE_IPV6_SUBNET: Lazy = Lazy::new(|| { + Ipv6Net( Ipv6Network::new( Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 2, 0, 0, 0, 0), Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH, - ).unwrap(), - ); - - /// The IPv4 subnet for Boundary NTP OPTE ports. - pub static ref NTP_OPTE_IPV4_SUBNET: Ipv4Net = - Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 3, 0), 24).unwrap()); - - /// The IPv6 subnet for Boundary NTP OPTE ports. - pub static ref NTP_OPTE_IPV6_SUBNET: Ipv6Net = Ipv6Net( + ) + .unwrap(), + ) +}); + +/// The IPv4 subnet for Boundary NTP OPTE ports. +pub static NTP_OPTE_IPV4_SUBNET: Lazy = Lazy::new(|| { + Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 3, 0), 24).unwrap()) +}); + +/// The IPv6 subnet for Boundary NTP OPTE ports. +pub static NTP_OPTE_IPV6_SUBNET: Lazy = Lazy::new(|| { + Ipv6Net( Ipv6Network::new( Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 3, 0, 0, 0, 0), Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH, - ).unwrap(), - ); -} + ) + .unwrap(), + ) +}); // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 704a7ab7bd..25833ec104 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -32,7 +32,6 @@ http.workspace = true hyper.workspace = true internal-dns.workspace = true ipnetwork.workspace = true -lazy_static.workspace = true macaddr.workspace = true mime_guess.workspace = true # Not under "dev-dependencies"; these also need to be implemented for diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 9d8afd1fea..d5320be733 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -24,9 +24,9 @@ headers.workspace = true http.workspace = true hyper.workspace = true ipnetwork.workspace = true -lazy_static.workspace = true macaddr.workspace = true newtype_derive.workspace = true +once_cell.workspace = true openssl.workspace = true oso.workspace = true paste.workspace = true diff --git a/nexus/db-queries/src/authn/external/spoof.rs b/nexus/db-queries/src/authn/external/spoof.rs index 0b5896a6f8..9b5ed94bde 100644 --- a/nexus/db-queries/src/authn/external/spoof.rs +++ b/nexus/db-queries/src/authn/external/spoof.rs @@ -16,7 +16,7 @@ use anyhow::Context; use async_trait::async_trait; use headers::authorization::{Authorization, Bearer}; use headers::HeaderMapExt; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use uuid::Uuid; // This scheme is intended for demos, development, and testing until we have a @@ -54,18 +54,21 @@ const SPOOF_RESERVED_BAD_CREDS: &str = "this-fake-ID-it-is-truly-excellent"; // subsets of the base64 character set, so we do not bother encoding them. const SPOOF_PREFIX: &str = "oxide-spoof-"; -lazy_static! { - /// Actor (id) used for the special "bad credentials" error - static ref SPOOF_RESERVED_BAD_CREDS_ACTOR: Actor = Actor::UserBuiltin { - user_builtin_id: "22222222-2222-2222-2222-222222222222".parse().unwrap(), - }; - /// Complete HTTP header value to trigger the "bad actor" error - pub static ref SPOOF_HEADER_BAD_ACTOR: Authorization = - make_header_value_str(SPOOF_RESERVED_BAD_ACTOR).unwrap(); - /// Complete HTTP header value to trigger the "bad creds" error - pub static ref SPOOF_HEADER_BAD_CREDS: Authorization = - make_header_value_str(SPOOF_RESERVED_BAD_CREDS).unwrap(); -} +/// Actor (id) used for the special "bad credentials" error +static SPOOF_RESERVED_BAD_CREDS_ACTOR: Lazy = + Lazy::new(|| Actor::UserBuiltin { + user_builtin_id: "22222222-2222-2222-2222-222222222222" + .parse() + .unwrap(), + }); + +/// Complete HTTP header value to trigger the "bad actor" error +pub static SPOOF_HEADER_BAD_ACTOR: Lazy> = + Lazy::new(|| make_header_value_str(SPOOF_RESERVED_BAD_ACTOR).unwrap()); + +/// Complete HTTP header value to trigger the "bad creds" error +pub static SPOOF_HEADER_BAD_CREDS: Lazy> = + Lazy::new(|| make_header_value_str(SPOOF_RESERVED_BAD_CREDS).unwrap()); /// Implements a (test-only) authentication scheme where the client simply /// provides the actor information in a custom bearer token and we always trust diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 2dfe2f7174..8485b8f11f 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -42,9 +42,9 @@ use crate::db::DataStore; use authz_macros::authz_resource; use futures::future::BoxFuture; use futures::FutureExt; -use lazy_static::lazy_static; use nexus_types::external_api::shared::{FleetRole, ProjectRole, SiloRole}; use omicron_common::api::external::{Error, LookupType, ResourceType}; +use once_cell::sync::Lazy; use oso::PolarClass; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -169,9 +169,8 @@ pub struct Fleet; /// Singleton representing the [`Fleet`] itself for authz purposes pub const FLEET: Fleet = Fleet; -lazy_static! { - pub static ref FLEET_LOOKUP: LookupType = LookupType::ById(*FLEET_ID); -} +pub static FLEET_LOOKUP: Lazy = + Lazy::new(|| LookupType::ById(*FLEET_ID)); impl Eq for Fleet {} impl PartialEq for Fleet { diff --git a/nexus/db-queries/src/db/datastore/silo_user.rs b/nexus/db-queries/src/db/datastore/silo_user.rs index 6084f8c2ab..59cb19a609 100644 --- a/nexus/db-queries/src/db/datastore/silo_user.rs +++ b/nexus/db-queries/src/db/datastore/silo_user.rs @@ -363,11 +363,11 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. - &*authn::USER_SERVICE_BALANCER, - &*authn::USER_INTERNAL_API, - &*authn::USER_INTERNAL_READ, - &*authn::USER_EXTERNAL_AUTHN, - &*authn::USER_SAGA_RECOVERY, + &authn::USER_SERVICE_BALANCER, + &authn::USER_INTERNAL_API, + &authn::USER_INTERNAL_READ, + &authn::USER_EXTERNAL_AUTHN, + &authn::USER_SAGA_RECOVERY, ] .iter() .map(|u| { diff --git a/nexus/db-queries/src/db/fixed_data/mod.rs b/nexus/db-queries/src/db/fixed_data/mod.rs index 5c91407134..4f896eb5d1 100644 --- a/nexus/db-queries/src/db/fixed_data/mod.rs +++ b/nexus/db-queries/src/db/fixed_data/mod.rs @@ -31,7 +31,7 @@ // 001de000-074c built-in services vpc // 001de000-c470 built-in services vpc subnets -use lazy_static::lazy_static; +use once_cell::sync::Lazy; pub mod project; pub mod role_assignment; @@ -43,13 +43,12 @@ pub mod vpc; pub mod vpc_firewall_rule; pub mod vpc_subnet; -lazy_static! { - /* See above for where this uuid comes from. */ - pub static ref FLEET_ID: uuid::Uuid = - "001de000-1334-4000-8000-000000000000" - .parse() - .expect("invalid uuid for builtin fleet id"); -} +/* See above for where this uuid comes from. */ +pub static FLEET_ID: Lazy = Lazy::new(|| { + "001de000-1334-4000-8000-000000000000" + .parse() + .expect("invalid uuid for builtin fleet id") +}); #[cfg(test)] fn assert_valid_uuid(id: &uuid::Uuid) { diff --git a/nexus/db-queries/src/db/fixed_data/project.rs b/nexus/db-queries/src/db/fixed_data/project.rs index 52450438c0..e240900e0c 100644 --- a/nexus/db-queries/src/db/fixed_data/project.rs +++ b/nexus/db-queries/src/db/fixed_data/project.rs @@ -4,18 +4,20 @@ use crate::db; use crate::db::datastore::SERVICES_DB_NAME; -use lazy_static::lazy_static; use nexus_types::external_api::params; use omicron_common::api::external::IdentityMetadataCreateParams; +use once_cell::sync::Lazy; -lazy_static! { - /// UUID of built-in project for internal services on the rack. - pub static ref SERVICES_PROJECT_ID: uuid::Uuid = "001de000-4401-4000-8000-000000000000" +/// UUID of built-in project for internal services on the rack. +pub static SERVICES_PROJECT_ID: Lazy = Lazy::new(|| { + "001de000-4401-4000-8000-000000000000" .parse() - .expect("invalid uuid for builtin services project id"); + .expect("invalid uuid for builtin services project id") +}); - /// Built-in Project for internal services on the rack. - pub static ref SERVICES_PROJECT: db::model::Project = db::model::Project::new_with_id( +/// Built-in Project for internal services on the rack. +pub static SERVICES_PROJECT: Lazy = Lazy::new(|| { + db::model::Project::new_with_id( *SERVICES_PROJECT_ID, *super::silo::INTERNAL_SILO_ID, params::ProjectCreate { @@ -24,5 +26,5 @@ lazy_static! { description: "Built-in project for Oxide Services".to_string(), }, }, - ); -} + ) +}); diff --git a/nexus/db-queries/src/db/fixed_data/role_assignment.rs b/nexus/db-queries/src/db/fixed_data/role_assignment.rs index 7d7ddffab6..d6c95d47b6 100644 --- a/nexus/db-queries/src/db/fixed_data/role_assignment.rs +++ b/nexus/db-queries/src/db/fixed_data/role_assignment.rs @@ -8,10 +8,10 @@ use super::user_builtin; use super::FLEET_ID; use crate::db::model::IdentityType; use crate::db::model::RoleAssignment; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; -lazy_static! { - pub static ref BUILTIN_ROLE_ASSIGNMENTS: Vec = +pub static BUILTIN_ROLE_ASSIGNMENTS: Lazy> = + Lazy::new(|| { vec![ // The "internal-api" user gets the "admin" role on the sole Fleet. // This is a pretty elevated privilege. @@ -24,7 +24,6 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), - // The "USER_SERVICE_BALANCER" user gets the "admin" role on the // Fleet. // @@ -38,7 +37,6 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), - // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control // plane data (like the list of sleds), which is in turn used to @@ -50,7 +48,6 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_VIEWER.role_name, ), - // The "external-authenticator" user gets the "authenticator" role // on the sole fleet. This grants them the ability to create // sessions. @@ -61,5 +58,5 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_AUTHENTICATOR.role_name, ), - ]; -} + ] + }); diff --git a/nexus/db-queries/src/db/fixed_data/role_builtin.rs b/nexus/db-queries/src/db/fixed_data/role_builtin.rs index 865f6328f4..f58077fc3f 100644 --- a/nexus/db-queries/src/db/fixed_data/role_builtin.rs +++ b/nexus/db-queries/src/db/fixed_data/role_builtin.rs @@ -3,8 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Built-in roles -use lazy_static::lazy_static; use omicron_common::api; +use once_cell::sync::Lazy; #[derive(Clone, Debug)] pub struct RoleBuiltinConfig { @@ -13,28 +13,36 @@ pub struct RoleBuiltinConfig { pub description: &'static str, } -lazy_static! { - pub static ref FLEET_ADMIN: RoleBuiltinConfig = RoleBuiltinConfig { +pub static FLEET_ADMIN: Lazy = + Lazy::new(|| RoleBuiltinConfig { resource_type: api::external::ResourceType::Fleet, role_name: "admin", description: "Fleet Administrator", - }; - pub static ref FLEET_AUTHENTICATOR: RoleBuiltinConfig = RoleBuiltinConfig { + }); + +pub static FLEET_AUTHENTICATOR: Lazy = + Lazy::new(|| RoleBuiltinConfig { resource_type: api::external::ResourceType::Fleet, role_name: "external-authenticator", description: "Fleet External Authenticator", - }; - pub static ref FLEET_VIEWER: RoleBuiltinConfig = RoleBuiltinConfig { + }); + +pub static FLEET_VIEWER: Lazy = + Lazy::new(|| RoleBuiltinConfig { resource_type: api::external::ResourceType::Fleet, role_name: "viewer", description: "Fleet Viewer", - }; - pub static ref SILO_ADMIN: RoleBuiltinConfig = RoleBuiltinConfig { + }); + +pub static SILO_ADMIN: Lazy = + Lazy::new(|| RoleBuiltinConfig { resource_type: api::external::ResourceType::Silo, role_name: "admin", description: "Silo Administrator", - }; - pub static ref BUILTIN_ROLES: Vec = vec![ + }); + +pub static BUILTIN_ROLES: Lazy> = Lazy::new(|| { + vec![ FLEET_ADMIN.clone(), FLEET_AUTHENTICATOR.clone(), FLEET_VIEWER.clone(), @@ -69,8 +77,8 @@ lazy_static! { role_name: "viewer", description: "Project Viewer", }, - ]; -} + ] +}); #[cfg(test)] mod test { diff --git a/nexus/db-queries/src/db/fixed_data/silo.rs b/nexus/db-queries/src/db/fixed_data/silo.rs index 6eba849ee3..62bcc61c1e 100644 --- a/nexus/db-queries/src/db/fixed_data/silo.rs +++ b/nexus/db-queries/src/db/fixed_data/silo.rs @@ -3,62 +3,66 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::db; -use lazy_static::lazy_static; use nexus_types::external_api::{params, shared}; use omicron_common::api::external::IdentityMetadataCreateParams; +use once_cell::sync::Lazy; -lazy_static! { - pub static ref SILO_ID: uuid::Uuid = "001de000-5110-4000-8000-000000000000" +pub static SILO_ID: Lazy = Lazy::new(|| { + "001de000-5110-4000-8000-000000000000" .parse() - .expect("invalid uuid for builtin silo id"); + .expect("invalid uuid for builtin silo id") +}); - /// "Default" Silo - /// - /// This was historically used for demos and the unit tests. The plan is to - /// remove it per omicron#2305. - pub static ref DEFAULT_SILO: db::model::Silo = - db::model::Silo::new_with_id( - *SILO_ID, - params::SiloCreate { - identity: IdentityMetadataCreateParams { - name: "default-silo".parse().unwrap(), - description: "default silo".to_string(), - }, - // This quota is actually _unused_ because the default silo - // isn't constructed in the same way a normal silo would be. - quotas: params::SiloQuotasCreate::empty(), - discoverable: false, - identity_mode: shared::SiloIdentityMode::LocalOnly, - admin_group_name: None, - tls_certificates: vec![], - mapped_fleet_roles: Default::default(), +/// "Default" Silo +/// +/// This was historically used for demos and the unit tests. The plan is to +/// remove it per omicron#2305. +pub static DEFAULT_SILO: Lazy = Lazy::new(|| { + db::model::Silo::new_with_id( + *SILO_ID, + params::SiloCreate { + identity: IdentityMetadataCreateParams { + name: "default-silo".parse().unwrap(), + description: "default silo".to_string(), }, - ) - .unwrap(); + // This quota is actually _unused_ because the default silo + // isn't constructed in the same way a normal silo would be. + quotas: params::SiloQuotasCreate::empty(), + discoverable: false, + identity_mode: shared::SiloIdentityMode::LocalOnly, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }, + ) + .unwrap() +}); - /// UUID of built-in internal silo. - pub static ref INTERNAL_SILO_ID: uuid::Uuid = - "001de000-5110-4000-8000-000000000001" - .parse() - .expect("invalid uuid for builtin silo id"); +/// UUID of built-in internal silo. +pub static INTERNAL_SILO_ID: Lazy = Lazy::new(|| { + "001de000-5110-4000-8000-000000000001" + .parse() + .expect("invalid uuid for builtin silo id") +}); - /// Built-in Silo to house internal resources. It contains no users and - /// can't be logged into. - pub static ref INTERNAL_SILO: db::model::Silo = - db::model::Silo::new_with_id( - *INTERNAL_SILO_ID, - params::SiloCreate { - identity: IdentityMetadataCreateParams { - name: "oxide-internal".parse().unwrap(), - description: "Built-in internal Silo.".to_string(), - }, - // The internal silo contains no virtual resources, so it has no allotted capacity. - quotas: params::SiloQuotasCreate::empty(), - discoverable: false, - identity_mode: shared::SiloIdentityMode::LocalOnly, - admin_group_name: None, - tls_certificates: vec![], - mapped_fleet_roles: Default::default(), +/// Built-in Silo to house internal resources. It contains no users and +/// can't be logged into. +pub static INTERNAL_SILO: Lazy = Lazy::new(|| { + db::model::Silo::new_with_id( + *INTERNAL_SILO_ID, + params::SiloCreate { + identity: IdentityMetadataCreateParams { + name: "oxide-internal".parse().unwrap(), + description: "Built-in internal Silo.".to_string(), }, - ).unwrap(); -} + // The internal silo contains no virtual resources, so it has no allotted capacity. + quotas: params::SiloQuotasCreate::empty(), + discoverable: false, + identity_mode: shared::SiloIdentityMode::LocalOnly, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }, + ) + .unwrap() +}); diff --git a/nexus/db-queries/src/db/fixed_data/silo_user.rs b/nexus/db-queries/src/db/fixed_data/silo_user.rs index d54bcfa59f..b5253b68e3 100644 --- a/nexus/db-queries/src/db/fixed_data/silo_user.rs +++ b/nexus/db-queries/src/db/fixed_data/silo_user.rs @@ -6,25 +6,26 @@ use super::role_builtin; use crate::db; use crate::db::identity::Asset; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; -lazy_static! { - /// Test user that's granted all privileges, used for automated testing and - /// local development - // TODO-security Once we have a way to bootstrap the initial Silo with the - // initial privileged user, this user should be created in the test suite, - // not automatically at Nexus startup. See omicron#2305. - pub static ref USER_TEST_PRIVILEGED: db::model::SiloUser = - db::model::SiloUser::new( - *db::fixed_data::silo::SILO_ID, - // "4007" looks a bit like "root". - "001de000-05e4-4000-8000-000000004007".parse().unwrap(), - "privileged".into(), - ); +/// Test user that's granted all privileges, used for automated testing and +/// local development +// TODO-security Once we have a way to bootstrap the initial Silo with the +// initial privileged user, this user should be created in the test suite, +// not automatically at Nexus startup. See omicron#2305. +pub static USER_TEST_PRIVILEGED: Lazy = Lazy::new(|| { + db::model::SiloUser::new( + *db::fixed_data::silo::SILO_ID, + // "4007" looks a bit like "root". + "001de000-05e4-4000-8000-000000004007".parse().unwrap(), + "privileged".into(), + ) +}); - /// Role assignments needed for the privileged user - pub static ref ROLE_ASSIGNMENTS_PRIVILEGED: - Vec = vec![ +/// Role assignments needed for the privileged user +pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy> = + Lazy::new(|| { + vec![ // The "test-privileged" user gets the "admin" role on the sole // Fleet as well as the default Silo. db::model::RoleAssignment::new( @@ -34,7 +35,6 @@ lazy_static! { *db::fixed_data::FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), - db::model::RoleAssignment::new( db::model::IdentityType::SiloUser, USER_TEST_PRIVILEGED.id(), @@ -42,20 +42,22 @@ lazy_static! { *db::fixed_data::silo::SILO_ID, role_builtin::SILO_ADMIN.role_name, ), - ]; + ] + }); - /// Test user that's granted no privileges, used for automated testing - // TODO-security Once we have a way to bootstrap the initial Silo with the - // initial privileged user, this user should be created in the test suite, - // not automatically at Nexus startup. See omicron#2305. - pub static ref USER_TEST_UNPRIVILEGED: db::model::SiloUser = +/// Test user that's granted no privileges, used for automated testing +// TODO-security Once we have a way to bootstrap the initial Silo with the +// initial privileged user, this user should be created in the test suite, +// not automatically at Nexus startup. See omicron#2305. +pub static USER_TEST_UNPRIVILEGED: Lazy = + Lazy::new(|| { db::model::SiloUser::new( *db::fixed_data::silo::SILO_ID, // 60001 is the decimal uid for "nobody" on Helios. "001de000-05e4-4000-8000-000000060001".parse().unwrap(), "unprivileged".into(), - ); -} + ) + }); #[cfg(test)] mod test { diff --git a/nexus/db-queries/src/db/fixed_data/user_builtin.rs b/nexus/db-queries/src/db/fixed_data/user_builtin.rs index 87f33fa355..1e96802683 100644 --- a/nexus/db-queries/src/db/fixed_data/user_builtin.rs +++ b/nexus/db-queries/src/db/fixed_data/user_builtin.rs @@ -3,8 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Built-in users -use lazy_static::lazy_static; use omicron_common::api; +use once_cell::sync::Lazy; use uuid::Uuid; pub struct UserBuiltinConfig { @@ -27,61 +27,65 @@ impl UserBuiltinConfig { } } -lazy_static! { - /// Internal user used for seeding initial database data - // NOTE: This uuid and name are duplicated in dbinit.sql. - pub static ref USER_DB_INIT: UserBuiltinConfig = - UserBuiltinConfig::new_static( - // "0001" is the first possible user that wouldn't be confused with - // 0, or root. - "001de000-05e4-4000-8000-000000000001", - "db-init", - "used for seeding initial database data", - ); +/// Internal user used for seeding initial database data +// NOTE: This uuid and name are duplicated in dbinit.sql. +pub static USER_DB_INIT: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + // "0001" is the first possible user that wouldn't be confused with + // 0, or root. + "001de000-05e4-4000-8000-000000000001", + "db-init", + "used for seeding initial database data", + ) +}); - /// Internal user for performing operations to manage the - /// provisioning of services across the fleet. - pub static ref USER_SERVICE_BALANCER: UserBuiltinConfig = - UserBuiltinConfig::new_static( - "001de000-05e4-4000-8000-00000000bac3", - "service-balancer", - "used for Nexus-driven service balancing", - ); +/// Internal user for performing operations to manage the +/// provisioning of services across the fleet. +pub static USER_SERVICE_BALANCER: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-00000000bac3", + "service-balancer", + "used for Nexus-driven service balancing", + ) +}); - /// Internal user used by Nexus when handling internal API requests - pub static ref USER_INTERNAL_API: UserBuiltinConfig = - UserBuiltinConfig::new_static( - "001de000-05e4-4000-8000-000000000002", - "internal-api", - "used by Nexus when handling internal API requests", - ); +/// Internal user used by Nexus when handling internal API requests +pub static USER_INTERNAL_API: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-000000000002", + "internal-api", + "used by Nexus when handling internal API requests", + ) +}); - /// Internal user used by Nexus to read privileged control plane data - pub static ref USER_INTERNAL_READ: UserBuiltinConfig = - UserBuiltinConfig::new_static( - // "4ead" looks like "read" - "001de000-05e4-4000-8000-000000004ead", - "internal-read", - "used by Nexus to read privileged control plane data", - ); +/// Internal user used by Nexus to read privileged control plane data +pub static USER_INTERNAL_READ: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + // "4ead" looks like "read" + "001de000-05e4-4000-8000-000000004ead", + "internal-read", + "used by Nexus to read privileged control plane data", + ) +}); - /// Internal user used by Nexus when recovering sagas - pub static ref USER_SAGA_RECOVERY: UserBuiltinConfig = - UserBuiltinConfig::new_static( - // "3a8a" looks a bit like "saga". - "001de000-05e4-4000-8000-000000003a8a", - "saga-recovery", - "used by Nexus when recovering sagas", - ); +/// Internal user used by Nexus when recovering sagas +pub static USER_SAGA_RECOVERY: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + // "3a8a" looks a bit like "saga". + "001de000-05e4-4000-8000-000000003a8a", + "saga-recovery", + "used by Nexus when recovering sagas", + ) +}); - /// Internal user used by Nexus when authenticating external requests - pub static ref USER_EXTERNAL_AUTHN: UserBuiltinConfig = - UserBuiltinConfig::new_static( - "001de000-05e4-4000-8000-000000000003", - "external-authn", - "used by Nexus when authenticating external requests", - ); -} +/// Internal user used by Nexus when authenticating external requests +pub static USER_EXTERNAL_AUTHN: Lazy = Lazy::new(|| { + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-000000000003", + "external-authn", + "used by Nexus when authenticating external requests", + ) +}); #[cfg(test)] mod test { diff --git a/nexus/db-queries/src/db/fixed_data/vpc.rs b/nexus/db-queries/src/db/fixed_data/vpc.rs index 6571e5c5f9..c71b655ddc 100644 --- a/nexus/db-queries/src/db/fixed_data/vpc.rs +++ b/nexus/db-queries/src/db/fixed_data/vpc.rs @@ -4,31 +4,35 @@ use crate::db; use crate::db::datastore::SERVICES_DB_NAME; -use lazy_static::lazy_static; use nexus_types::external_api::params; use omicron_common::address::SERVICE_VPC_IPV6_PREFIX; use omicron_common::api::external::IdentityMetadataCreateParams; +use once_cell::sync::Lazy; -lazy_static! { - /// UUID of built-in VPC for internal services on the rack. - pub static ref SERVICES_VPC_ID: uuid::Uuid = "001de000-074c-4000-8000-000000000000" +/// UUID of built-in VPC for internal services on the rack. +pub static SERVICES_VPC_ID: Lazy = Lazy::new(|| { + "001de000-074c-4000-8000-000000000000" .parse() - .expect("invalid uuid for builtin services vpc id"); + .expect("invalid uuid for builtin services vpc id") +}); - /// UUID of VpcRouter for built-in Services VPC. - pub static ref SERVICES_VPC_ROUTER_ID: uuid::Uuid = - "001de000-074c-4000-8000-000000000001" - .parse() - .expect("invalid uuid for builtin services vpc router id"); +/// UUID of VpcRouter for built-in Services VPC. +pub static SERVICES_VPC_ROUTER_ID: Lazy = Lazy::new(|| { + "001de000-074c-4000-8000-000000000001" + .parse() + .expect("invalid uuid for builtin services vpc router id") +}); - /// UUID of default route for built-in Services VPC. - pub static ref SERVICES_VPC_DEFAULT_ROUTE_ID: uuid::Uuid = - "001de000-074c-4000-8000-000000000002" - .parse() - .expect("invalid uuid for builtin services vpc default route id"); +/// UUID of default route for built-in Services VPC. +pub static SERVICES_VPC_DEFAULT_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-074c-4000-8000-000000000002" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); - /// Built-in VPC for internal services on the rack. - pub static ref SERVICES_VPC: db::model::IncompleteVpc = db::model::IncompleteVpc::new( +/// Built-in VPC for internal services on the rack. +pub static SERVICES_VPC: Lazy = Lazy::new(|| { + db::model::IncompleteVpc::new( *SERVICES_VPC_ID, *super::project::SERVICES_PROJECT_ID, *SERVICES_VPC_ROUTER_ID, @@ -43,5 +47,5 @@ lazy_static! { ) // `IncompleteVpc::new` only fails if given an invalid `ipv6_prefix` // but we know `SERVICE_VPC_IPV6_PREFIX` is valid. - .unwrap(); -} + .unwrap() +}); diff --git a/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs b/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs index 3fae24abee..5062b1a11c 100644 --- a/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs +++ b/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs @@ -2,72 +2,63 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use lazy_static::lazy_static; use nexus_types::identity::Resource; use omicron_common::api::external::{ L4PortRange, VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRuleFilter, VpcFirewallRulePriority, VpcFirewallRuleProtocol, VpcFirewallRuleStatus, VpcFirewallRuleTarget, VpcFirewallRuleUpdate, }; +use once_cell::sync::Lazy; -lazy_static! { - /// Built-in VPC firewall rule for External DNS. - pub static ref DNS_VPC_FW_RULE: VpcFirewallRuleUpdate = VpcFirewallRuleUpdate { +/// Built-in VPC firewall rule for External DNS. +pub static DNS_VPC_FW_RULE: Lazy = + Lazy::new(|| VpcFirewallRuleUpdate { name: "external-dns-inbound".parse().unwrap(), description: "allow inbound connections for DNS from anywhere" .to_string(), status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, - targets: vec![ - VpcFirewallRuleTarget::Subnet( - super::vpc_subnet::DNS_VPC_SUBNET.name().clone(), - ), - ], + targets: vec![VpcFirewallRuleTarget::Subnet( + super::vpc_subnet::DNS_VPC_SUBNET.name().clone(), + )], filters: VpcFirewallRuleFilter { hosts: None, protocols: Some(vec![VpcFirewallRuleProtocol::Udp]), - ports: Some( - vec![ - L4PortRange { - first: 53.try_into().unwrap(), - last: 53.try_into().unwrap(), - }, - ], - ), + ports: Some(vec![L4PortRange { + first: 53.try_into().unwrap(), + last: 53.try_into().unwrap(), + }]), }, action: VpcFirewallRuleAction::Allow, priority: VpcFirewallRulePriority(65534), - }; + }); - /// Built-in VPC firewall rule for Nexus. - pub static ref NEXUS_VPC_FW_RULE: VpcFirewallRuleUpdate = VpcFirewallRuleUpdate { +/// Built-in VPC firewall rule for Nexus. +pub static NEXUS_VPC_FW_RULE: Lazy = + Lazy::new(|| VpcFirewallRuleUpdate { name: "nexus-inbound".parse().unwrap(), - description: "allow inbound connections for console & api from anywhere" - .to_string(), + description: + "allow inbound connections for console & api from anywhere" + .to_string(), status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, - targets: vec![ - VpcFirewallRuleTarget::Subnet( - super::vpc_subnet::NEXUS_VPC_SUBNET.name().clone(), - ), - ], + targets: vec![VpcFirewallRuleTarget::Subnet( + super::vpc_subnet::NEXUS_VPC_SUBNET.name().clone(), + )], filters: VpcFirewallRuleFilter { hosts: None, protocols: Some(vec![VpcFirewallRuleProtocol::Tcp]), - ports: Some( - vec![ - L4PortRange { - first: 80.try_into().unwrap(), - last: 80.try_into().unwrap(), - }, - L4PortRange { - first: 443.try_into().unwrap(), - last: 443.try_into().unwrap(), - }, - ], - ), + ports: Some(vec![ + L4PortRange { + first: 80.try_into().unwrap(), + last: 80.try_into().unwrap(), + }, + L4PortRange { + first: 443.try_into().unwrap(), + last: 443.try_into().unwrap(), + }, + ]), }, action: VpcFirewallRuleAction::Allow, priority: VpcFirewallRulePriority(65534), - }; -} + }); diff --git a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs b/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs index 59bc87b34c..c42d4121c9 100644 --- a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs +++ b/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs @@ -3,32 +3,37 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::db::model::VpcSubnet; -use lazy_static::lazy_static; use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, DNS_OPTE_IPV6_SUBNET, NEXUS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV6_SUBNET, NTP_OPTE_IPV4_SUBNET, NTP_OPTE_IPV6_SUBNET, }; use omicron_common::api::external::IdentityMetadataCreateParams; +use once_cell::sync::Lazy; -lazy_static! { - /// UUID of built-in VPC Subnet for External DNS. - pub static ref DNS_VPC_SUBNET_ID: uuid::Uuid = "001de000-c470-4000-8000-000000000001" +/// UUID of built-in VPC Subnet for External DNS. +pub static DNS_VPC_SUBNET_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000001" .parse() - .expect("invalid uuid for builtin external dns vpc subnet id"); + .expect("invalid uuid for builtin external dns vpc subnet id") +}); - /// UUID of built-in VPC Subnet for Nexus. - pub static ref NEXUS_VPC_SUBNET_ID: uuid::Uuid = "001de000-c470-4000-8000-000000000002" +/// UUID of built-in VPC Subnet for Nexus. +pub static NEXUS_VPC_SUBNET_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000002" .parse() - .expect("invalid uuid for builtin nexus vpc subnet id"); + .expect("invalid uuid for builtin nexus vpc subnet id") +}); - /// UUID of built-in VPC Subnet for Boundary NTP. - pub static ref NTP_VPC_SUBNET_ID: uuid::Uuid = "001de000-c470-4000-8000-000000000003" +/// UUID of built-in VPC Subnet for Boundary NTP. +pub static NTP_VPC_SUBNET_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000003" .parse() - .expect("invalid uuid for builtin boundary ntp vpc subnet id"); + .expect("invalid uuid for builtin boundary ntp vpc subnet id") +}); - - /// Built-in VPC Subnet for External DNS. - pub static ref DNS_VPC_SUBNET: VpcSubnet = VpcSubnet::new( +/// Built-in VPC Subnet for External DNS. +pub static DNS_VPC_SUBNET: Lazy = Lazy::new(|| { + VpcSubnet::new( *DNS_VPC_SUBNET_ID, *super::vpc::SERVICES_VPC_ID, IdentityMetadataCreateParams { @@ -38,10 +43,12 @@ lazy_static! { }, *DNS_OPTE_IPV4_SUBNET, *DNS_OPTE_IPV6_SUBNET, - ); + ) +}); - /// Built-in VPC Subnet for Nexus. - pub static ref NEXUS_VPC_SUBNET: VpcSubnet = VpcSubnet::new( +/// Built-in VPC Subnet for Nexus. +pub static NEXUS_VPC_SUBNET: Lazy = Lazy::new(|| { + VpcSubnet::new( *NEXUS_VPC_SUBNET_ID, *super::vpc::SERVICES_VPC_ID, IdentityMetadataCreateParams { @@ -51,10 +58,12 @@ lazy_static! { }, *NEXUS_OPTE_IPV4_SUBNET, *NEXUS_OPTE_IPV6_SUBNET, - ); + ) +}); - /// Built-in VPC Subnet for Boundary NTP. - pub static ref NTP_VPC_SUBNET: VpcSubnet = VpcSubnet::new( +/// Built-in VPC Subnet for Boundary NTP. +pub static NTP_VPC_SUBNET: Lazy = Lazy::new(|| { + VpcSubnet::new( *NTP_VPC_SUBNET_ID, *super::vpc::SERVICES_VPC_ID, IdentityMetadataCreateParams { @@ -64,5 +73,5 @@ lazy_static! { }, *NTP_OPTE_IPV4_SUBNET, *NTP_OPTE_IPV6_SUBNET, - ); -} + ) +}); diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 1dbe57da6f..6d00b4bc29 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -30,6 +30,7 @@ use nexus_db_model::NetworkInterfaceKindEnum; use omicron_common::api::external; use omicron_common::api::external::MacAddr; use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; +use once_cell::sync::Lazy; use std::net::IpAddr; use uuid::Uuid; @@ -42,36 +43,35 @@ pub(crate) const MAX_NICS: usize = 8; // These are sentinel values and other constants used to verify the state of the // system when operating on network interfaces -lazy_static::lazy_static! { - // States an instance must be in to operate on its network interfaces, in - // most situations. - static ref INSTANCE_STOPPED: db::model::InstanceState = - db::model::InstanceState(external::InstanceState::Stopped); - - static ref INSTANCE_FAILED: db::model::InstanceState = - db::model::InstanceState(external::InstanceState::Failed); - - // An instance can be in the creating state while we manipulate its - // interfaces. The intention is for this only to be the case during sagas. - static ref INSTANCE_CREATING: db::model::InstanceState = - db::model::InstanceState(external::InstanceState::Creating); - - // A sentinel value for the instance state when the instance actually does - // not exist. - static ref INSTANCE_DESTROYED: db::model::InstanceState = - db::model::InstanceState(external::InstanceState::Destroyed); - - // A sentinel value for the instance state when the instance has an active - // VMM, irrespective of that VMM's actual state. - static ref INSTANCE_RUNNING: db::model::InstanceState = - db::model::InstanceState(external::InstanceState::Running); - - static ref NO_INSTANCE_SENTINEL_STRING: String = - String::from(NO_INSTANCE_SENTINEL); - - static ref INSTANCE_BAD_STATE_SENTINEL_STRING: String = - String::from(INSTANCE_BAD_STATE_SENTINEL); -} + +// States an instance must be in to operate on its network interfaces, in +// most situations. +static INSTANCE_STOPPED: Lazy = + Lazy::new(|| db::model::InstanceState(external::InstanceState::Stopped)); + +static INSTANCE_FAILED: Lazy = + Lazy::new(|| db::model::InstanceState(external::InstanceState::Failed)); + +// An instance can be in the creating state while we manipulate its +// interfaces. The intention is for this only to be the case during sagas. +static INSTANCE_CREATING: Lazy = + Lazy::new(|| db::model::InstanceState(external::InstanceState::Creating)); + +// A sentinel value for the instance state when the instance actually does +// not exist. +static INSTANCE_DESTROYED: Lazy = + Lazy::new(|| db::model::InstanceState(external::InstanceState::Destroyed)); + +// A sentinel value for the instance state when the instance has an active +// VMM, irrespective of that VMM's actual state. +static INSTANCE_RUNNING: Lazy = + Lazy::new(|| db::model::InstanceState(external::InstanceState::Running)); + +static NO_INSTANCE_SENTINEL_STRING: Lazy = + Lazy::new(|| String::from(NO_INSTANCE_SENTINEL)); + +static INSTANCE_BAD_STATE_SENTINEL_STRING: Lazy = + Lazy::new(|| String::from(INSTANCE_BAD_STATE_SENTINEL)); // Uncastable sentinel used to detect when an instance exists, but is not // in the right state to have its network interfaces altered diff --git a/nexus/db-queries/src/db/saga_recovery.rs b/nexus/db-queries/src/db/saga_recovery.rs index 802093b889..55cda03c3c 100644 --- a/nexus/db-queries/src/db/saga_recovery.rs +++ b/nexus/db-queries/src/db/saga_recovery.rs @@ -305,9 +305,9 @@ mod test { use super::*; use crate::context::OpContext; use crate::db::test_utils::UnpluggableCockroachDbSecStore; - use lazy_static::lazy_static; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; + use once_cell::sync::Lazy; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use steno::{ new_action_noop_undo, Action, ActionContext, ActionError, @@ -376,12 +376,10 @@ mod test { type ExecContextType = TestContext; } - lazy_static! { - static ref ACTION_N1: Arc> = - new_action_noop_undo("n1_action", node_one); - static ref ACTION_N2: Arc> = - new_action_noop_undo("n2_action", node_two); - } + static ACTION_N1: Lazy>> = + Lazy::new(|| new_action_noop_undo("n1_action", node_one)); + static ACTION_N2: Lazy>> = + Lazy::new(|| new_action_noop_undo("n2_action", node_two)); fn registry_create() -> Arc> { let mut registry = ActionRegistry::new(); diff --git a/nexus/defaults/Cargo.toml b/nexus/defaults/Cargo.toml index 0724b5bf4d..535b78054b 100644 --- a/nexus/defaults/Cargo.toml +++ b/nexus/defaults/Cargo.toml @@ -6,7 +6,7 @@ license = "MPL-2.0" [dependencies] ipnetwork.workspace = true -lazy_static.workspace = true +once_cell.workspace = true rand.workspace = true serde_json.workspace = true diff --git a/nexus/defaults/src/lib.rs b/nexus/defaults/src/lib.rs index be1ce2193c..dd08b4e4ab 100644 --- a/nexus/defaults/src/lib.rs +++ b/nexus/defaults/src/lib.rs @@ -6,10 +6,10 @@ use ipnetwork::Ipv4Network; use ipnetwork::Ipv6Network; -use lazy_static::lazy_static; use omicron_common::api::external; use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; +use once_cell::sync::Lazy; use std::net::Ipv4Addr; use std::net::Ipv6Addr; @@ -17,51 +17,51 @@ use std::net::Ipv6Addr; /// instance. pub const DEFAULT_PRIMARY_NIC_NAME: &str = "net0"; -lazy_static! { - /// The default IPv4 subnet range assigned to the default VPC Subnet, when - /// the VPC is created, if one is not provided in the request. See - /// for details. - pub static ref DEFAULT_VPC_SUBNET_IPV4_BLOCK: external::Ipv4Net = - Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 0, 0), 22).unwrap()); -} +/// The default IPv4 subnet range assigned to the default VPC Subnet, when +/// the VPC is created, if one is not provided in the request. See +/// for details. +pub static DEFAULT_VPC_SUBNET_IPV4_BLOCK: Lazy = + Lazy::new(|| { + Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 0, 0), 22).unwrap()) + }); -lazy_static! { - pub static ref DEFAULT_FIREWALL_RULES: external::VpcFirewallRuleUpdateParams = +pub static DEFAULT_FIREWALL_RULES: Lazy = + Lazy::new(|| { serde_json::from_str(r#"{ - "rules": [ - { - "name": "allow-internal-inbound", - "status": "enabled", - "direction": "inbound", - "targets": [ { "type": "vpc", "value": "default" } ], - "filters": { "hosts": [ { "type": "vpc", "value": "default" } ] }, - "action": "allow", - "priority": 65534, - "description": "allow inbound traffic to all instances within the VPC if originated within the VPC" - }, - { - "name": "allow-ssh", - "status": "enabled", - "direction": "inbound", - "targets": [ { "type": "vpc", "value": "default" } ], - "filters": { "ports": [ "22" ], "protocols": [ "TCP" ] }, - "action": "allow", - "priority": 65534, - "description": "allow inbound TCP connections on port 22 from anywhere" - }, - { - "name": "allow-icmp", - "status": "enabled", - "direction": "inbound", - "targets": [ { "type": "vpc", "value": "default" } ], - "filters": { "protocols": [ "ICMP" ] }, - "action": "allow", - "priority": 65534, - "description": "allow inbound ICMP traffic from anywhere" - } - ] - }"#).unwrap(); -} + "rules": [ + { + "name": "allow-internal-inbound", + "status": "enabled", + "direction": "inbound", + "targets": [ { "type": "vpc", "value": "default" } ], + "filters": { "hosts": [ { "type": "vpc", "value": "default" } ] }, + "action": "allow", + "priority": 65534, + "description": "allow inbound traffic to all instances within the VPC if originated within the VPC" + }, + { + "name": "allow-ssh", + "status": "enabled", + "direction": "inbound", + "targets": [ { "type": "vpc", "value": "default" } ], + "filters": { "ports": [ "22" ], "protocols": [ "TCP" ] }, + "action": "allow", + "priority": 65534, + "description": "allow inbound TCP connections on port 22 from anywhere" + }, + { + "name": "allow-icmp", + "status": "enabled", + "direction": "inbound", + "targets": [ { "type": "vpc", "value": "default" } ], + "filters": { "protocols": [ "ICMP" ] }, + "action": "allow", + "priority": 65534, + "description": "allow inbound ICMP traffic from anywhere" + } + ] + }"#).unwrap() + }); /// Generate a random VPC IPv6 prefix, in the range `fd00::/48`. pub fn random_vpc_ipv6_prefix() -> Result { diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index d779d34459..90450c3145 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -17,7 +17,6 @@ use dropshot::{ }; use http::{header, Response, StatusCode, Uri}; use hyper::Body; -use lazy_static::lazy_static; use mime_guess; use nexus_db_model::AuthenticationMode; use nexus_db_queries::authn::silos::IdentityProviderType; @@ -36,6 +35,7 @@ use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{DataPageParams, Error, NameOrId}; +use once_cell::sync::Lazy; use parse_display::Display; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -810,15 +810,15 @@ fn not_found(internal_msg: &str) -> HttpError { HttpError::for_not_found(None, internal_msg.to_string()) } -lazy_static! { - static ref ALLOWED_EXTENSIONS: HashSet = HashSet::from( +static ALLOWED_EXTENSIONS: Lazy> = Lazy::new(|| { + HashSet::from( [ "js", "css", "html", "ico", "map", "otf", "png", "svg", "ttf", "txt", "webp", "woff", "woff2", ] - .map(|s| OsString::from(s)) - ); -} + .map(|s| OsString::from(s)), + ) +}); /// Starting from `root_dir`, follow the segments of `path` down the file tree /// until we find a file (or not). Do not follow symlinks. diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 545129d567..be0ea2a3f5 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -11,7 +11,6 @@ use crate::integration_tests::unauthorized::HTTP_SERVER; use chrono::Utc; use http::method::Method; use internal_dns::names::DNS_ZONE_EXTERNAL_TESTING; -use lazy_static::lazy_static; use nexus_db_queries::authn; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::identity::Resource; @@ -38,225 +37,264 @@ use omicron_common::api::external::RouteTarget; use omicron_common::api::external::SemverVersion; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_test_utils::certificates::CertificateChain; +use once_cell::sync::Lazy; use std::net::IpAddr; use std::net::Ipv4Addr; use std::str::FromStr; use uuid::Uuid; -lazy_static! { - pub static ref HARDWARE_RACK_URL: String = - format!("/v1/system/hardware/racks/{}", RACK_UUID); - pub static ref HARDWARE_UNINITIALIZED_SLEDS: String = - format!("/v1/system/hardware/sleds-uninitialized"); - pub static ref HARDWARE_SLED_URL: String = - format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID); - pub static ref HARDWARE_SLED_PROVISION_STATE_URL: String = - format!("/v1/system/hardware/sleds/{}/provision-state", SLED_AGENT_UUID); - pub static ref DEMO_SLED_PROVISION_STATE: params::SledProvisionStateParams = +pub static HARDWARE_RACK_URL: Lazy = + Lazy::new(|| format!("/v1/system/hardware/racks/{}", RACK_UUID)); +pub const HARDWARE_UNINITIALIZED_SLEDS: &'static str = + "/v1/system/hardware/sleds-uninitialized"; +pub static HARDWARE_SLED_URL: Lazy = + Lazy::new(|| format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID)); +pub static HARDWARE_SLED_PROVISION_STATE_URL: Lazy = Lazy::new(|| { + format!("/v1/system/hardware/sleds/{}/provision-state", SLED_AGENT_UUID) +}); +pub static DEMO_SLED_PROVISION_STATE: Lazy = + Lazy::new(|| { params::SledProvisionStateParams { state: nexus_types::external_api::views::SledProvisionState::NonProvisionable, - }; - pub static ref HARDWARE_SWITCH_URL: String = - format!("/v1/system/hardware/switches/{}", SWITCH_UUID); - pub static ref HARDWARE_DISK_URL: String = - format!("/v1/system/hardware/disks"); - pub static ref HARDWARE_SLED_DISK_URL: String = - format!("/v1/system/hardware/sleds/{}/disks", SLED_AGENT_UUID); - - pub static ref SLED_INSTANCES_URL: String = - format!("/v1/system/hardware/sleds/{}/instances", SLED_AGENT_UUID); - - pub static ref DEMO_UNINITIALIZED_SLED: UninitializedSled = UninitializedSled { + } + }); + +pub static HARDWARE_SWITCH_URL: Lazy = + Lazy::new(|| format!("/v1/system/hardware/switches/{}", SWITCH_UUID)); +pub const HARDWARE_DISK_URL: &'static str = "/v1/system/hardware/disks"; +pub static HARDWARE_SLED_DISK_URL: Lazy = Lazy::new(|| { + format!("/v1/system/hardware/sleds/{}/disks", SLED_AGENT_UUID) +}); + +pub static SLED_INSTANCES_URL: Lazy = Lazy::new(|| { + format!("/v1/system/hardware/sleds/{}/instances", SLED_AGENT_UUID) +}); +pub static DEMO_UNINITIALIZED_SLED: Lazy = + Lazy::new(|| UninitializedSled { baseboard: Baseboard { serial: "demo-serial".to_string(), part: "demo-part".to_string(), - revision: 6 + revision: 6, }, rack_id: Uuid::new_v4(), - cubby: 1 - }; - - // Global policy - pub static ref SYSTEM_POLICY_URL: &'static str = "/v1/system/policy"; - - // Silo used for testing - pub static ref DEMO_SILO_NAME: Name = "demo-silo".parse().unwrap(); - pub static ref DEMO_SILO_URL: String = - format!("/v1/system/silos/{}", *DEMO_SILO_NAME); - pub static ref DEMO_SILO_POLICY_URL: String = - format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME); - pub static ref DEMO_SILO_QUOTAS_URL: String = - format!("/v1/system/silos/{}/quotas", *DEMO_SILO_NAME); - pub static ref DEMO_SILO_CREATE: params::SiloCreate = - params::SiloCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_SILO_NAME.clone(), - description: String::from(""), - }, - quotas: params::SiloQuotasCreate::arbitrarily_high_default(), - discoverable: true, - identity_mode: shared::SiloIdentityMode::SamlJit, - admin_group_name: None, - tls_certificates: vec![], - mapped_fleet_roles: Default::default(), - }; - - pub static ref DEMO_SILO_UTIL_URL: String = format!("/v1/system/utilization/silos/{}", *DEMO_SILO_NAME); - - // Use the default Silo for testing the local IdP - pub static ref DEMO_SILO_USERS_CREATE_URL: String = format!( + cubby: 1, + }); + +// Global policy +pub const SYSTEM_POLICY_URL: &'static str = "/v1/system/policy"; + +// Silo used for testing +pub static DEMO_SILO_NAME: Lazy = + Lazy::new(|| "demo-silo".parse().unwrap()); +pub static DEMO_SILO_URL: Lazy = + Lazy::new(|| format!("/v1/system/silos/{}", *DEMO_SILO_NAME)); +pub static DEMO_SILO_POLICY_URL: Lazy = + Lazy::new(|| format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME)); +pub static DEMO_SILO_QUOTAS_URL: Lazy = + Lazy::new(|| format!("/v1/system/silos/{}/quotas", *DEMO_SILO_NAME)); +pub static DEMO_SILO_CREATE: Lazy = + Lazy::new(|| params::SiloCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_SILO_NAME.clone(), + description: String::from(""), + }, + quotas: params::SiloQuotasCreate::arbitrarily_high_default(), + discoverable: true, + identity_mode: shared::SiloIdentityMode::SamlJit, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }); + +pub static DEMO_SILO_UTIL_URL: Lazy = + Lazy::new(|| format!("/v1/system/utilization/silos/{}", *DEMO_SILO_NAME)); + +// Use the default Silo for testing the local IdP +pub static DEMO_SILO_USERS_CREATE_URL: Lazy = Lazy::new(|| { + format!( "/v1/system/identity-providers/local/users?silo={}", DEFAULT_SILO.identity().name, - ); - pub static ref DEMO_SILO_USERS_LIST_URL: String = format!( - "/v1/system/users?silo={}", - DEFAULT_SILO.identity().name, - ); - pub static ref DEMO_SILO_USER_ID_GET_URL: String = format!( - "/v1/system/users/{{id}}?silo={}", - DEFAULT_SILO.identity().name, - ); - pub static ref DEMO_SILO_USER_ID_DELETE_URL: String = format!( + ) +}); +pub static DEMO_SILO_USERS_LIST_URL: Lazy = Lazy::new(|| { + format!("/v1/system/users?silo={}", DEFAULT_SILO.identity().name,) +}); +pub static DEMO_SILO_USER_ID_GET_URL: Lazy = Lazy::new(|| { + format!("/v1/system/users/{{id}}?silo={}", DEFAULT_SILO.identity().name,) +}); +pub static DEMO_SILO_USER_ID_DELETE_URL: Lazy = Lazy::new(|| { + format!( "/v1/system/identity-providers/local/users/{{id}}?silo={}", DEFAULT_SILO.identity().name, - ); - pub static ref DEMO_SILO_USER_ID_SET_PASSWORD_URL: String = format!( + ) +}); +pub static DEMO_SILO_USER_ID_SET_PASSWORD_URL: Lazy = Lazy::new(|| { + format!( "/v1/system/identity-providers/local/users/{{id}}/set-password?silo={}", DEFAULT_SILO.identity().name, - ); -} - -lazy_static! { - - // Project used for testing - pub static ref DEMO_PROJECT_NAME: Name = "demo-project".parse().unwrap(); - pub static ref DEMO_PROJECT_URL: String = - format!("/v1/projects/{}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_SELECTOR: String = - format!("project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_POLICY_URL: String = - format!("/v1/projects/{}/policy", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_DISKS: String = - format!("/v1/disks?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_IMAGES: String = - format!("/v1/images?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_INSTANCES: String = format!("/v1/instances?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_SNAPSHOTS: String = format!("/v1/snapshots?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_VPCS: String = format!("/v1/vpcs?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_URL_FIPS: String = format!("/v1/floating-ips?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_CREATE: params::ProjectCreate = - params::ProjectCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_PROJECT_NAME.clone(), - description: String::from(""), - }, - }; - - // VPC used for testing - pub static ref DEMO_VPC_NAME: Name = "demo-vpc".parse().unwrap(); - pub static ref DEMO_VPC_URL: String = - format!("/v1/vpcs/{}?{}", *DEMO_VPC_NAME, *DEMO_PROJECT_SELECTOR); - - pub static ref DEMO_VPC_SELECTOR: String = - format!("project={}&vpc={}", *DEMO_PROJECT_NAME, *DEMO_VPC_NAME); - pub static ref DEMO_VPC_URL_FIREWALL_RULES: String = - format!("/v1/vpc-firewall-rules?{}", *DEMO_VPC_SELECTOR); - pub static ref DEMO_VPC_URL_ROUTERS: String = - format!("/v1/vpc-routers?{}", *DEMO_VPC_SELECTOR); - pub static ref DEMO_VPC_URL_SUBNETS: String = - format!("/v1/vpc-subnets?{}", *DEMO_VPC_SELECTOR); - pub static ref DEMO_VPC_CREATE: params::VpcCreate = - params::VpcCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_VPC_NAME.clone(), - description: String::from(""), - }, - ipv6_prefix: None, - dns_name: DEMO_VPC_NAME.clone(), - }; - - // VPC Subnet used for testing - pub static ref DEMO_VPC_SUBNET_NAME: Name = - "demo-vpc-subnet".parse().unwrap(); - pub static ref DEMO_VPC_SUBNET_URL: String = - format!("/v1/vpc-subnets/{}?{}", *DEMO_VPC_SUBNET_NAME, *DEMO_VPC_SELECTOR); - pub static ref DEMO_VPC_SUBNET_INTERFACES_URL: String = - format!("/v1/vpc-subnets/{}/network-interfaces?{}", *DEMO_VPC_SUBNET_NAME, *DEMO_VPC_SELECTOR); - pub static ref DEMO_VPC_SUBNET_CREATE: params::VpcSubnetCreate = - params::VpcSubnetCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_VPC_SUBNET_NAME.clone(), - description: String::from(""), - }, - ipv4_block: Ipv4Net("10.1.2.3/8".parse().unwrap()), - ipv6_block: None, - }; - - // VPC Router used for testing - pub static ref DEMO_VPC_ROUTER_NAME: Name = - "demo-vpc-router".parse().unwrap(); - pub static ref DEMO_VPC_ROUTER_URL: String = - format!("/v1/vpc-routers/{}?project={}&vpc={}", *DEMO_VPC_ROUTER_NAME, *DEMO_PROJECT_NAME, *DEMO_VPC_NAME); - pub static ref DEMO_VPC_ROUTER_URL_ROUTES: String = - format!("/v1/vpc-router-routes?project={}&vpc={}&router={}", *DEMO_PROJECT_NAME, *DEMO_VPC_NAME, *DEMO_VPC_ROUTER_NAME); - pub static ref DEMO_VPC_ROUTER_CREATE: params::VpcRouterCreate = - params::VpcRouterCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_VPC_ROUTER_NAME.clone(), - description: String::from(""), - }, - }; - - // Router Route used for testing - pub static ref DEMO_ROUTER_ROUTE_NAME: Name = - "demo-router-route".parse().unwrap(); - pub static ref DEMO_ROUTER_ROUTE_URL: String = - format!("/v1/vpc-router-routes/{}?project={}&vpc={}&router={}", *DEMO_ROUTER_ROUTE_NAME, *DEMO_PROJECT_NAME, *DEMO_VPC_NAME, *DEMO_VPC_ROUTER_NAME); - pub static ref DEMO_ROUTER_ROUTE_CREATE: params::RouterRouteCreate = - params::RouterRouteCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_ROUTER_ROUTE_NAME.clone(), - description: String::from(""), - }, - target: RouteTarget::Ip(IpAddr::from(Ipv4Addr::new(127, 0, 0, 1))), - destination: RouteDestination::Subnet("loopback".parse().unwrap()), - }; - - // Disk used for testing - pub static ref DEMO_DISK_NAME: Name = "demo-disk".parse().unwrap(); - // TODO: Once we can test a URL multiple times we should also a case to exercise authz for disks filtered by instances - pub static ref DEMO_DISKS_URL: String = - format!("/v1/disks?{}", *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_DISK_URL: String = - format!("/v1/disks/{}?{}", *DEMO_DISK_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_DISK_CREATE: params::DiskCreate = - params::DiskCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_DISK_NAME.clone(), - description: "".parse().unwrap(), - }, - disk_source: params::DiskSource::Blank { - block_size: params::BlockSize::try_from(4096).unwrap(), - }, - size: ByteCount::from_gibibytes_u32( - // divide by at least two to leave space for snapshot blocks - DiskTest::DEFAULT_ZPOOL_SIZE_GIB / 5 - ), - }; - pub static ref DEMO_DISK_METRICS_URL: String = - format!( - "/v1/disks/{}/metrics/activated?start_time={:?}&end_time={:?}&{}", - *DEMO_DISK_NAME, - Utc::now(), - Utc::now(), - *DEMO_PROJECT_SELECTOR, - ); - - // Related to importing blocks from an external source - pub static ref DEMO_IMPORT_DISK_NAME: Name = "demo-import-disk".parse().unwrap(); - pub static ref DEMO_IMPORT_DISK_URL: String = - format!("/v1/disks/{}?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_IMPORT_DISK_CREATE: params::DiskCreate = + ) +}); + +// Project used for testing +pub static DEMO_PROJECT_NAME: Lazy = + Lazy::new(|| "demo-project".parse().unwrap()); +pub static DEMO_PROJECT_URL: Lazy = + Lazy::new(|| format!("/v1/projects/{}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_SELECTOR: Lazy = + Lazy::new(|| format!("project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_POLICY_URL: Lazy = + Lazy::new(|| format!("/v1/projects/{}/policy", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_URL_IMAGES: Lazy = + Lazy::new(|| format!("/v1/images?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_URL_INSTANCES: Lazy = + Lazy::new(|| format!("/v1/instances?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_URL_SNAPSHOTS: Lazy = + Lazy::new(|| format!("/v1/snapshots?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_URL_VPCS: Lazy = + Lazy::new(|| format!("/v1/vpcs?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_URL_FIPS: Lazy = + Lazy::new(|| format!("/v1/floating-ips?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_CREATE: Lazy = + Lazy::new(|| params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_PROJECT_NAME.clone(), + description: String::from(""), + }, + }); + +// VPC used for testing +pub static DEMO_VPC_NAME: Lazy = + Lazy::new(|| "demo-vpc".parse().unwrap()); +pub static DEMO_VPC_URL: Lazy = Lazy::new(|| { + format!("/v1/vpcs/{}?{}", *DEMO_VPC_NAME, *DEMO_PROJECT_SELECTOR) +}); +pub static DEMO_VPC_SELECTOR: Lazy = Lazy::new(|| { + format!("project={}&vpc={}", *DEMO_PROJECT_NAME, *DEMO_VPC_NAME) +}); +pub static DEMO_VPC_URL_FIREWALL_RULES: Lazy = + Lazy::new(|| format!("/v1/vpc-firewall-rules?{}", *DEMO_VPC_SELECTOR)); +pub static DEMO_VPC_URL_ROUTERS: Lazy = + Lazy::new(|| format!("/v1/vpc-routers?{}", *DEMO_VPC_SELECTOR)); +pub static DEMO_VPC_URL_SUBNETS: Lazy = + Lazy::new(|| format!("/v1/vpc-subnets?{}", *DEMO_VPC_SELECTOR)); +pub static DEMO_VPC_CREATE: Lazy = + Lazy::new(|| params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_VPC_NAME.clone(), + description: String::from(""), + }, + ipv6_prefix: None, + dns_name: DEMO_VPC_NAME.clone(), + }); + +// VPC Subnet used for testing +pub static DEMO_VPC_SUBNET_NAME: Lazy = + Lazy::new(|| "demo-vpc-subnet".parse().unwrap()); +pub static DEMO_VPC_SUBNET_URL: Lazy = Lazy::new(|| { + format!("/v1/vpc-subnets/{}?{}", *DEMO_VPC_SUBNET_NAME, *DEMO_VPC_SELECTOR) +}); +pub static DEMO_VPC_SUBNET_INTERFACES_URL: Lazy = Lazy::new(|| { + format!( + "/v1/vpc-subnets/{}/network-interfaces?{}", + *DEMO_VPC_SUBNET_NAME, *DEMO_VPC_SELECTOR + ) +}); +pub static DEMO_VPC_SUBNET_CREATE: Lazy = + Lazy::new(|| params::VpcSubnetCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_VPC_SUBNET_NAME.clone(), + description: String::from(""), + }, + ipv4_block: Ipv4Net("10.1.2.3/8".parse().unwrap()), + ipv6_block: None, + }); + +// VPC Router used for testing +pub static DEMO_VPC_ROUTER_NAME: Lazy = + Lazy::new(|| "demo-vpc-router".parse().unwrap()); +pub static DEMO_VPC_ROUTER_URL: Lazy = Lazy::new(|| { + format!( + "/v1/vpc-routers/{}?project={}&vpc={}", + *DEMO_VPC_ROUTER_NAME, *DEMO_PROJECT_NAME, *DEMO_VPC_NAME + ) +}); +pub static DEMO_VPC_ROUTER_URL_ROUTES: Lazy = Lazy::new(|| { + format!( + "/v1/vpc-router-routes?project={}&vpc={}&router={}", + *DEMO_PROJECT_NAME, *DEMO_VPC_NAME, *DEMO_VPC_ROUTER_NAME + ) +}); +pub static DEMO_VPC_ROUTER_CREATE: Lazy = + Lazy::new(|| params::VpcRouterCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_VPC_ROUTER_NAME.clone(), + description: String::from(""), + }, + }); + +// Router Route used for testing +pub static DEMO_ROUTER_ROUTE_NAME: Lazy = + Lazy::new(|| "demo-router-route".parse().unwrap()); +pub static DEMO_ROUTER_ROUTE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/vpc-router-routes/{}?project={}&vpc={}&router={}", + *DEMO_ROUTER_ROUTE_NAME, + *DEMO_PROJECT_NAME, + *DEMO_VPC_NAME, + *DEMO_VPC_ROUTER_NAME + ) +}); +pub static DEMO_ROUTER_ROUTE_CREATE: Lazy = + Lazy::new(|| params::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_ROUTER_ROUTE_NAME.clone(), + description: String::from(""), + }, + target: RouteTarget::Ip(IpAddr::from(Ipv4Addr::new(127, 0, 0, 1))), + destination: RouteDestination::Subnet("loopback".parse().unwrap()), + }); + +// Disk used for testing +pub static DEMO_DISK_NAME: Lazy = + Lazy::new(|| "demo-disk".parse().unwrap()); + +// TODO: Once we can test a URL multiple times we should also a case to exercise +// authz for disks filtered by instances +pub static DEMO_DISKS_URL: Lazy = + Lazy::new(|| format!("/v1/disks?{}", *DEMO_PROJECT_SELECTOR)); +pub static DEMO_DISK_URL: Lazy = Lazy::new(|| { + format!("/v1/disks/{}?{}", *DEMO_DISK_NAME, *DEMO_PROJECT_SELECTOR) +}); +pub static DEMO_DISK_CREATE: Lazy = Lazy::new(|| { + params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_DISK_NAME.clone(), + description: "".parse().unwrap(), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(4096).unwrap(), + }, + size: ByteCount::from_gibibytes_u32( + // divide by at least two to leave space for snapshot blocks + DiskTest::DEFAULT_ZPOOL_SIZE_GIB / 5, + ), + } +}); +pub static DEMO_DISK_METRICS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/disks/{}/metrics/activated?start_time={:?}&end_time={:?}&{}", + *DEMO_DISK_NAME, + Utc::now(), + Utc::now(), + *DEMO_PROJECT_SELECTOR, + ) +}); + +// Related to importing blocks from an external source +pub static DEMO_IMPORT_DISK_NAME: Lazy = + Lazy::new(|| "demo-import-disk".parse().unwrap()); +pub static DEMO_IMPORT_DISK_CREATE: Lazy = + Lazy::new(|| { params::DiskCreate { identity: IdentityMetadataCreateParams { name: DEMO_IMPORT_DISK_NAME.clone(), @@ -267,381 +305,486 @@ lazy_static! { }, size: ByteCount::from_gibibytes_u32( // divide by at least two to leave space for snapshot blocks - DiskTest::DEFAULT_ZPOOL_SIZE_GIB / 5 + DiskTest::DEFAULT_ZPOOL_SIZE_GIB / 5, ), - }; - - pub static ref DEMO_IMPORT_DISK_BULK_WRITE_START_URL: String = - format!("/v1/disks/{}/bulk-write-start?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_IMPORT_DISK_BULK_WRITE_URL: String = - format!("/v1/disks/{}/bulk-write?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_IMPORT_DISK_BULK_WRITE_STOP_URL: String = - format!("/v1/disks/{}/bulk-write-stop?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_IMPORT_DISK_FINALIZE_URL: String = - format!("/v1/disks/{}/finalize?{}", *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR); -} - -// Separate lazy_static! blocks to avoid hitting some recursion limit when -// compiling -lazy_static! { - // Instance used for testing - pub static ref DEMO_INSTANCE_NAME: Name = "demo-instance".parse().unwrap(); - pub static ref DEMO_INSTANCE_SELECTOR: String = format!("{}&instance={}", *DEMO_PROJECT_SELECTOR, *DEMO_INSTANCE_NAME); - pub static ref DEMO_INSTANCE_URL: String = - format!("/v1/instances/{}?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_START_URL: String = - format!("/v1/instances/{}/start?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_STOP_URL: String = - format!("/v1/instances/{}/stop?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_REBOOT_URL: String = - format!("/v1/instances/{}/reboot?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_MIGRATE_URL: String = - format!("/v1/instances/{}/migrate?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_SERIAL_URL: String = - format!("/v1/instances/{}/serial-console?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_SERIAL_STREAM_URL: String = - format!("/v1/instances/{}/serial-console/stream?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - - pub static ref DEMO_INSTANCE_DISKS_URL: String = - format!("/v1/instances/{}/disks?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_DISKS_ATTACH_URL: String = - format!("/v1/instances/{}/disks/attach?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_DISKS_DETACH_URL: String = - format!("/v1/instances/{}/disks/detach?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - - pub static ref DEMO_INSTANCE_NICS_URL: String = - format!("/v1/network-interfaces?project={}&instance={}", *DEMO_PROJECT_NAME, *DEMO_INSTANCE_NAME); - pub static ref DEMO_INSTANCE_EXTERNAL_IPS_URL: String = - format!("/v1/instances/{}/external-ips?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR); - pub static ref DEMO_INSTANCE_CREATE: params::InstanceCreate = - params::InstanceCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_INSTANCE_NAME.clone(), - description: String::from(""), - }, - ncpus: InstanceCpuCount(1), - memory: ByteCount::from_gibibytes_u32(16), - hostname: String::from("demo-instance"), - user_data: vec![], - network_interfaces: - params::InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![ - params::ExternalIpCreate::Ephemeral { pool_name: Some(DEMO_IP_POOL_NAME.clone()) } - ], - disks: vec![], - start: true, - }; - - // The instance needs a network interface, too. - pub static ref DEMO_INSTANCE_NIC_NAME: Name = - nexus_defaults::DEFAULT_PRIMARY_NIC_NAME.parse().unwrap(); - pub static ref DEMO_INSTANCE_NIC_URL: String = - format!("/v1/network-interfaces/{}?project={}&instance={}", *DEMO_INSTANCE_NIC_NAME, *DEMO_PROJECT_NAME, *DEMO_INSTANCE_NAME); - pub static ref DEMO_INSTANCE_NIC_CREATE: params::InstanceNetworkInterfaceCreate = - params::InstanceNetworkInterfaceCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_INSTANCE_NIC_NAME.clone(), - description: String::from(""), - }, - vpc_name: DEMO_VPC_NAME.clone(), - subnet_name: DEMO_VPC_SUBNET_NAME.clone(), - ip: None, - }; - pub static ref DEMO_INSTANCE_NIC_PUT: params::InstanceNetworkInterfaceUpdate = { - params::InstanceNetworkInterfaceUpdate { - identity: IdentityMetadataUpdateParams { - name: None, - description: Some(String::from("an updated description")), - }, - primary: false, } - }; -} - -lazy_static! { - pub static ref DEMO_CERTIFICATE_NAME: Name = - "demo-certificate".parse().unwrap(); - pub static ref DEMO_CERTIFICATES_URL: String = format!("/v1/certificates"); - pub static ref DEMO_CERTIFICATE_URL: String = - format!("/v1/certificates/demo-certificate"); - pub static ref DEMO_CERTIFICATE: CertificateChain = - CertificateChain::new(format!("*.sys.{DNS_ZONE_EXTERNAL_TESTING}")); - pub static ref DEMO_CERTIFICATE_CREATE: params::CertificateCreate = - params::CertificateCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_CERTIFICATE_NAME.clone(), - description: String::from(""), - }, - cert: DEMO_CERTIFICATE.cert_chain_as_pem(), - key: DEMO_CERTIFICATE.end_cert_private_key_as_pem(), - service: shared::ServiceUsingCertificate::ExternalApi, - }; -} - -lazy_static! { - pub static ref DEMO_SWITCH_PORT_URL: String = - format!("/v1/system/hardware/switch-port"); - - pub static ref DEMO_SWITCH_PORT_SETTINGS_APPLY_URL: String = + }); +pub static DEMO_IMPORT_DISK_BULK_WRITE_START_URL: Lazy = + Lazy::new(|| { format!( - "/v1/system/hardware/switch-port/qsfp7/settings?rack_id={}&switch_location={}", - uuid::Uuid::new_v4(), - "switch0", - ); - - pub static ref DEMO_SWITCH_PORT_SETTINGS: params::SwitchPortApplySettings = - params::SwitchPortApplySettings { - port_settings: NameOrId::Name("portofino".parse().unwrap()), - }; -} - -lazy_static! { - pub static ref DEMO_LOOPBACK_CREATE_URL: String = - "/v1/system/networking/loopback-address".into(); - pub static ref DEMO_LOOPBACK_URL: String = format!( + "/v1/disks/{}/bulk-write-start?{}", + *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR + ) + }); +pub static DEMO_IMPORT_DISK_BULK_WRITE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/disks/{}/bulk-write?{}", + *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_IMPORT_DISK_BULK_WRITE_STOP_URL: Lazy = + Lazy::new(|| { + format!( + "/v1/disks/{}/bulk-write-stop?{}", + *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR + ) + }); +pub static DEMO_IMPORT_DISK_FINALIZE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/disks/{}/finalize?{}", + *DEMO_IMPORT_DISK_NAME, *DEMO_PROJECT_SELECTOR + ) +}); + +// Instance used for testing +pub static DEMO_INSTANCE_NAME: Lazy = + Lazy::new(|| "demo-instance".parse().unwrap()); +pub static DEMO_INSTANCE_URL: Lazy = Lazy::new(|| { + format!("/v1/instances/{}?{}", *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR) +}); +pub static DEMO_INSTANCE_START_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/start?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_STOP_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/stop?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_REBOOT_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/reboot?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_MIGRATE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/migrate?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_SERIAL_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/serial-console?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_SERIAL_STREAM_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/serial-console/stream?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_DISKS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/disks?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_DISKS_ATTACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/disks/attach?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_DISKS_DETACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/disks/detach?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_NICS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/network-interfaces?project={}&instance={}", + *DEMO_PROJECT_NAME, *DEMO_INSTANCE_NAME + ) +}); +pub static DEMO_INSTANCE_EXTERNAL_IPS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/external-ips?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_INSTANCE_CREATE: Lazy = + Lazy::new(|| params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_INSTANCE_NAME.clone(), + description: String::from(""), + }, + ncpus: InstanceCpuCount(1), + memory: ByteCount::from_gibibytes_u32(16), + hostname: String::from("demo-instance"), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: Some(DEMO_IP_POOL_NAME.clone()), + }], + disks: vec![], + start: true, + }); + +// The instance needs a network interface, too. +pub static DEMO_INSTANCE_NIC_NAME: Lazy = + Lazy::new(|| nexus_defaults::DEFAULT_PRIMARY_NIC_NAME.parse().unwrap()); +pub static DEMO_INSTANCE_NIC_URL: Lazy = Lazy::new(|| { + format!( + "/v1/network-interfaces/{}?project={}&instance={}", + *DEMO_INSTANCE_NIC_NAME, *DEMO_PROJECT_NAME, *DEMO_INSTANCE_NAME + ) +}); +pub static DEMO_INSTANCE_NIC_CREATE: Lazy< + params::InstanceNetworkInterfaceCreate, +> = Lazy::new(|| params::InstanceNetworkInterfaceCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_INSTANCE_NIC_NAME.clone(), + description: String::from(""), + }, + vpc_name: DEMO_VPC_NAME.clone(), + subnet_name: DEMO_VPC_SUBNET_NAME.clone(), + ip: None, +}); +pub static DEMO_INSTANCE_NIC_PUT: Lazy = + Lazy::new(|| params::InstanceNetworkInterfaceUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some(String::from("an updated description")), + }, + primary: false, + }); + +pub static DEMO_CERTIFICATE_NAME: Lazy = + Lazy::new(|| "demo-certificate".parse().unwrap()); +pub const DEMO_CERTIFICATES_URL: &'static str = "/v1/certificates"; +pub const DEMO_CERTIFICATE_URL: &'static str = + "/v1/certificates/demo-certificate"; +pub static DEMO_CERTIFICATE: Lazy = Lazy::new(|| { + CertificateChain::new(format!("*.sys.{DNS_ZONE_EXTERNAL_TESTING}")) +}); +pub static DEMO_CERTIFICATE_CREATE: Lazy = + Lazy::new(|| params::CertificateCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_CERTIFICATE_NAME.clone(), + description: String::from(""), + }, + cert: DEMO_CERTIFICATE.cert_chain_as_pem(), + key: DEMO_CERTIFICATE.end_cert_private_key_as_pem(), + service: shared::ServiceUsingCertificate::ExternalApi, + }); + +pub const DEMO_SWITCH_PORT_URL: &'static str = + "/v1/system/hardware/switch-port"; +pub static DEMO_SWITCH_PORT_SETTINGS_APPLY_URL: Lazy = Lazy::new( + || { + format!( + "/v1/system/hardware/switch-port/qsfp7/settings?rack_id={}&switch_location={}", + uuid::Uuid::new_v4(), + "switch0", + ) + }, +); +pub static DEMO_SWITCH_PORT_SETTINGS: Lazy = + Lazy::new(|| params::SwitchPortApplySettings { + port_settings: NameOrId::Name("portofino".parse().unwrap()), + }); + +pub static DEMO_LOOPBACK_CREATE_URL: Lazy = + Lazy::new(|| "/v1/system/networking/loopback-address".into()); +pub static DEMO_LOOPBACK_URL: Lazy = Lazy::new(|| { + format!( "/v1/system/networking/loopback-address/{}/{}/{}", uuid::Uuid::new_v4(), "switch0", "203.0.113.99/24", - ); - pub static ref DEMO_LOOPBACK_CREATE: params::LoopbackAddressCreate = - params::LoopbackAddressCreate { - address_lot: NameOrId::Name("parkinglot".parse().unwrap()), - rack_id: uuid::Uuid::new_v4(), - switch_location: "switch0".parse().unwrap(), - address: "203.0.113.99".parse().unwrap(), - mask: 24, - anycast: false, - }; -} - -lazy_static! { - pub static ref DEMO_SWITCH_PORT_SETTINGS_URL: String = format!( - "/v1/system/networking/switch-port-settings?port_settings=portofino" - ); - pub static ref DEMO_SWITCH_PORT_SETTINGS_INFO_URL: String = - format!("/v1/system/networking/switch-port-settings/protofino"); - pub static ref DEMO_SWITCH_PORT_SETTINGS_CREATE: params::SwitchPortSettingsCreate = - params::SwitchPortSettingsCreate::new(IdentityMetadataCreateParams { - name: "portofino".parse().unwrap(), - description: "just a port".into(), - }); -} - -lazy_static! { - pub static ref DEMO_ADDRESS_LOTS_URL: String = - format!("/v1/system/networking/address-lot"); - pub static ref DEMO_ADDRESS_LOT_URL: String = - format!("/v1/system/networking/address-lot/parkinglot"); - pub static ref DEMO_ADDRESS_LOT_BLOCKS_URL: String = - format!("/v1/system/networking/address-lot/parkinglot/blocks"); - pub static ref DEMO_ADDRESS_LOT_CREATE: params::AddressLotCreate = - params::AddressLotCreate { - identity: IdentityMetadataCreateParams { - name: "parkinglot".parse().unwrap(), - description: "an address parking lot".into(), - }, - kind: AddressLotKind::Infra, - blocks: vec![params::AddressLotBlockCreate { - first_address: "203.0.113.10".parse().unwrap(), - last_address: "203.0.113.20".parse().unwrap(), - }], - }; -} - -lazy_static! { - pub static ref DEMO_BGP_CONFIG_CREATE_URL: String = - format!("/v1/system/networking/bgp?name_or_id=as47"); - pub static ref DEMO_BGP_CONFIG: params::BgpConfigCreate = - params::BgpConfigCreate { - identity: IdentityMetadataCreateParams { - name: "as47".parse().unwrap(), - description: "BGP config for AS47".into(), - }, - bgp_announce_set_id: NameOrId::Name("instances".parse().unwrap()), - asn: 47, - vrf: None, - }; - pub static ref DEMO_BGP_ANNOUNCE_SET_URL: String = - format!("/v1/system/networking/bgp-announce?name_or_id=a-bag-of-addrs"); - pub static ref DEMO_BGP_ANNOUNCE: params::BgpAnnounceSetCreate = - params::BgpAnnounceSetCreate { - identity: IdentityMetadataCreateParams { - name: "a-bag-of-addrs".parse().unwrap(), - description: "a bag of addrs".into(), - }, - announcement: vec![params::BgpAnnouncementCreate { - address_lot_block: NameOrId::Name( - "some-block".parse().unwrap(), - ), - network: "10.0.0.0/16".parse().unwrap(), - }], - }; - pub static ref DEMO_BGP_STATUS_URL: String = - format!("/v1/system/networking/bgp-status"); - pub static ref DEMO_BGP_ROUTES_IPV4_URL: String = - format!("/v1/system/networking/bgp-routes-ipv4?asn=47"); -} + ) +}); +pub static DEMO_LOOPBACK_CREATE: Lazy = + Lazy::new(|| params::LoopbackAddressCreate { + address_lot: NameOrId::Name("parkinglot".parse().unwrap()), + rack_id: uuid::Uuid::new_v4(), + switch_location: "switch0".parse().unwrap(), + address: "203.0.113.99".parse().unwrap(), + mask: 24, + anycast: false, + }); + +pub const DEMO_SWITCH_PORT_SETTINGS_URL: &'static str = + "/v1/system/networking/switch-port-settings?port_settings=portofino"; +pub const DEMO_SWITCH_PORT_SETTINGS_INFO_URL: &'static str = + "/v1/system/networking/switch-port-settings/protofino"; +pub static DEMO_SWITCH_PORT_SETTINGS_CREATE: Lazy< + params::SwitchPortSettingsCreate, +> = Lazy::new(|| { + params::SwitchPortSettingsCreate::new(IdentityMetadataCreateParams { + name: "portofino".parse().unwrap(), + description: "just a port".into(), + }) +}); + +pub const DEMO_ADDRESS_LOTS_URL: &'static str = + "/v1/system/networking/address-lot"; +pub const DEMO_ADDRESS_LOT_URL: &'static str = + "/v1/system/networking/address-lot/parkinglot"; +pub const DEMO_ADDRESS_LOT_BLOCKS_URL: &'static str = + "/v1/system/networking/address-lot/parkinglot/blocks"; +pub static DEMO_ADDRESS_LOT_CREATE: Lazy = + Lazy::new(|| params::AddressLotCreate { + identity: IdentityMetadataCreateParams { + name: "parkinglot".parse().unwrap(), + description: "an address parking lot".into(), + }, + kind: AddressLotKind::Infra, + blocks: vec![params::AddressLotBlockCreate { + first_address: "203.0.113.10".parse().unwrap(), + last_address: "203.0.113.20".parse().unwrap(), + }], + }); + +pub const DEMO_BGP_CONFIG_CREATE_URL: &'static str = + "/v1/system/networking/bgp?name_or_id=as47"; +pub static DEMO_BGP_CONFIG: Lazy = + Lazy::new(|| params::BgpConfigCreate { + identity: IdentityMetadataCreateParams { + name: "as47".parse().unwrap(), + description: "BGP config for AS47".into(), + }, + bgp_announce_set_id: NameOrId::Name("instances".parse().unwrap()), + asn: 47, + vrf: None, + }); +pub const DEMO_BGP_ANNOUNCE_SET_URL: &'static str = + "/v1/system/networking/bgp-announce?name_or_id=a-bag-of-addrs"; +pub static DEMO_BGP_ANNOUNCE: Lazy = + Lazy::new(|| params::BgpAnnounceSetCreate { + identity: IdentityMetadataCreateParams { + name: "a-bag-of-addrs".parse().unwrap(), + description: "a bag of addrs".into(), + }, + announcement: vec![params::BgpAnnouncementCreate { + address_lot_block: NameOrId::Name("some-block".parse().unwrap()), + network: "10.0.0.0/16".parse().unwrap(), + }], + }); +pub const DEMO_BGP_STATUS_URL: &'static str = + "/v1/system/networking/bgp-status"; +pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = + "/v1/system/networking/bgp-routes-ipv4?asn=47"; + +// Project Images +pub static DEMO_IMAGE_NAME: Lazy = + Lazy::new(|| "demo-image".parse().unwrap()); +pub static DEMO_PROJECT_IMAGES_URL: Lazy = + Lazy::new(|| format!("/v1/images?project={}", *DEMO_PROJECT_NAME)); +pub static DEMO_PROJECT_IMAGE_URL: Lazy = Lazy::new(|| { + format!("/v1/images/{}?project={}", *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME) +}); +pub static DEMO_PROJECT_PROMOTE_IMAGE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/images/{}/promote?project={}", + *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME + ) +}); + +pub static DEMO_SILO_DEMOTE_IMAGE_URL: Lazy = Lazy::new(|| { + format!( + "/v1/images/{}/demote?project={}", + *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME + ) +}); + +pub static DEMO_IMAGE_CREATE: Lazy = + Lazy::new(|| params::ImageCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_IMAGE_NAME.clone(), + description: String::from(""), + }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + os: "fake-os".to_string(), + version: "1.0".to_string(), + }); + +// IP Pools +pub static DEMO_IP_POOLS_PROJ_URL: Lazy = + Lazy::new(|| format!("/v1/ip-pools?project={}", *DEMO_PROJECT_NAME)); +pub const DEMO_IP_POOLS_URL: &'static str = "/v1/system/ip-pools"; +pub static DEMO_IP_POOL_NAME: Lazy = + Lazy::new(|| "default".parse().unwrap()); +pub static DEMO_IP_POOL_CREATE: Lazy = + Lazy::new(|| params::IpPoolCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_IP_POOL_NAME.clone(), + description: String::from("an IP pool"), + }, + silo: None, + is_default: true, + }); +pub static DEMO_IP_POOL_PROJ_URL: Lazy = Lazy::new(|| { + format!( + "/v1/ip-pools/{}?project={}", + *DEMO_IP_POOL_NAME, *DEMO_PROJECT_NAME + ) +}); +pub static DEMO_IP_POOL_URL: Lazy = + Lazy::new(|| format!("/v1/system/ip-pools/{}", *DEMO_IP_POOL_NAME)); +pub static DEMO_IP_POOL_UPDATE: Lazy = + Lazy::new(|| params::IpPoolUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some(String::from("a new IP pool")), + }, + }); +pub static DEMO_IP_POOL_RANGE: Lazy = Lazy::new(|| { + IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 0), + std::net::Ipv4Addr::new(10, 0, 0, 255), + ) + .unwrap(), + ) +}); +pub static DEMO_IP_POOL_RANGES_URL: Lazy = + Lazy::new(|| format!("{}/ranges", *DEMO_IP_POOL_URL)); +pub static DEMO_IP_POOL_RANGES_ADD_URL: Lazy = + Lazy::new(|| format!("{}/add", *DEMO_IP_POOL_RANGES_URL)); +pub static DEMO_IP_POOL_RANGES_DEL_URL: Lazy = + Lazy::new(|| format!("{}/remove", *DEMO_IP_POOL_RANGES_URL)); + +// IP Pools (Services) +pub const DEMO_IP_POOL_SERVICE_URL: &'static str = + "/v1/system/ip-pools-service"; +pub static DEMO_IP_POOL_SERVICE_RANGES_URL: Lazy = + Lazy::new(|| format!("{}/ranges", DEMO_IP_POOL_SERVICE_URL)); +pub static DEMO_IP_POOL_SERVICE_RANGES_ADD_URL: Lazy = + Lazy::new(|| format!("{}/add", *DEMO_IP_POOL_SERVICE_RANGES_URL)); +pub static DEMO_IP_POOL_SERVICE_RANGES_DEL_URL: Lazy = + Lazy::new(|| format!("{}/remove", *DEMO_IP_POOL_SERVICE_RANGES_URL)); + +// Snapshots +pub static DEMO_SNAPSHOT_NAME: Lazy = + Lazy::new(|| "demo-snapshot".parse().unwrap()); +pub static DEMO_SNAPSHOT_URL: Lazy = Lazy::new(|| { + format!( + "/v1/snapshots/{}?project={}", + *DEMO_SNAPSHOT_NAME, *DEMO_PROJECT_NAME + ) +}); +pub static DEMO_SNAPSHOT_CREATE: Lazy = + Lazy::new(|| params::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_SNAPSHOT_NAME.clone(), + description: String::from(""), + }, + disk: DEMO_DISK_NAME.clone().into(), + }); -lazy_static! { - // Project Images - pub static ref DEMO_IMAGE_NAME: Name = "demo-image".parse().unwrap(); - pub static ref DEMO_PROJECT_IMAGES_URL: String = - format!("/v1/images?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_IMAGE_URL: String = - format!("/v1/images/{}?project={}", *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_PROJECT_PROMOTE_IMAGE_URL: String = - format!("/v1/images/{}/promote?project={}", *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_SILO_DEMOTE_IMAGE_URL: String = - format!("/v1/images/{}/demote?project={}", *DEMO_IMAGE_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_IMAGE_CREATE: params::ImageCreate = - params::ImageCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_IMAGE_NAME.clone(), - description: String::from(""), - }, - source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, - os: "fake-os".to_string(), - version: "1.0".to_string() - }; - - // IP Pools - pub static ref DEMO_IP_POOLS_PROJ_URL: String = - format!("/v1/ip-pools?project={}", *DEMO_PROJECT_NAME); - pub static ref DEMO_IP_POOLS_URL: &'static str = "/v1/system/ip-pools"; - pub static ref DEMO_IP_POOL_NAME: Name = "default".parse().unwrap(); - pub static ref DEMO_IP_POOL_CREATE: params::IpPoolCreate = - params::IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_IP_POOL_NAME.clone(), - description: String::from("an IP pool"), - }, - silo: None, - is_default: true, - }; - pub static ref DEMO_IP_POOL_PROJ_URL: String = - format!("/v1/ip-pools/{}?project={}", *DEMO_IP_POOL_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_IP_POOL_URL: String = format!("/v1/system/ip-pools/{}", *DEMO_IP_POOL_NAME); - pub static ref DEMO_IP_POOL_UPDATE: params::IpPoolUpdate = - params::IpPoolUpdate { - identity: IdentityMetadataUpdateParams { - name: None, - description: Some(String::from("a new IP pool")), - }, - }; - pub static ref DEMO_IP_POOL_RANGE: IpRange = IpRange::V4(Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 0), - std::net::Ipv4Addr::new(10, 0, 0, 255), - ).unwrap()); - pub static ref DEMO_IP_POOL_RANGES_URL: String = format!("{}/ranges", *DEMO_IP_POOL_URL); - pub static ref DEMO_IP_POOL_RANGES_ADD_URL: String = format!("{}/add", *DEMO_IP_POOL_RANGES_URL); - pub static ref DEMO_IP_POOL_RANGES_DEL_URL: String = format!("{}/remove", *DEMO_IP_POOL_RANGES_URL); - - // IP Pools (Services) - pub static ref DEMO_IP_POOL_SERVICE_URL: &'static str = "/v1/system/ip-pools-service"; - pub static ref DEMO_IP_POOL_SERVICE_RANGES_URL: String = format!("{}/ranges", *DEMO_IP_POOL_SERVICE_URL); - pub static ref DEMO_IP_POOL_SERVICE_RANGES_ADD_URL: String = format!("{}/add", *DEMO_IP_POOL_SERVICE_RANGES_URL); - pub static ref DEMO_IP_POOL_SERVICE_RANGES_DEL_URL: String = format!("{}/remove", *DEMO_IP_POOL_SERVICE_RANGES_URL); - - // Snapshots - pub static ref DEMO_SNAPSHOT_NAME: Name = "demo-snapshot".parse().unwrap(); - pub static ref DEMO_SNAPSHOT_URL: String = - format!("/v1/snapshots/{}?project={}", *DEMO_SNAPSHOT_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_SNAPSHOT_CREATE: params::SnapshotCreate = - params::SnapshotCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_SNAPSHOT_NAME.clone(), - description: String::from(""), - }, - disk: DEMO_DISK_NAME.clone().into(), - }; +// SSH keys +pub const DEMO_SSHKEYS_URL: &'static str = "/v1/me/ssh-keys"; +pub static DEMO_SSHKEY_NAME: Lazy = + Lazy::new(|| "aaaaa-ssh-key".parse().unwrap()); - // SSH keys - pub static ref DEMO_SSHKEYS_URL: &'static str = "/v1/me/ssh-keys"; - pub static ref DEMO_SSHKEY_NAME: Name = "aaaaa-ssh-key".parse().unwrap(); - pub static ref DEMO_SSHKEY_CREATE: params::SshKeyCreate = params::SshKeyCreate { +pub static DEMO_SSHKEY_CREATE: Lazy = + Lazy::new(|| params::SshKeyCreate { identity: IdentityMetadataCreateParams { name: DEMO_SSHKEY_NAME.clone(), description: "a demo key".to_string(), }, public_key: "AAAAAAAAAAAAAAA".to_string(), - }; + }); - pub static ref DEMO_SPECIFIC_SSHKEY_URL: String = - format!("{}/{}", *DEMO_SSHKEYS_URL, *DEMO_SSHKEY_NAME); +pub static DEMO_SPECIFIC_SSHKEY_URL: Lazy = + Lazy::new(|| format!("{}/{}", DEMO_SSHKEYS_URL, *DEMO_SSHKEY_NAME)); - // System update +// System update - pub static ref DEMO_SYSTEM_UPDATE_PARAMS: params::SystemUpdatePath = params::SystemUpdatePath { - version: SemverVersion::new(1,0,0), - }; -} +pub static DEMO_SYSTEM_UPDATE_PARAMS: Lazy = + Lazy::new(|| params::SystemUpdatePath { + version: SemverVersion::new(1, 0, 0), + }); -lazy_static! { - // Project Floating IPs - pub static ref DEMO_FLOAT_IP_NAME: Name = "float-ip".parse().unwrap(); - pub static ref DEMO_FLOAT_IP_URL: String = - format!("/v1/floating-ips/{}?project={}", *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_NAME); - pub static ref DEMO_FLOAT_IP_CREATE: params::FloatingIpCreate = - params::FloatingIpCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_FLOAT_IP_NAME.clone(), - description: String::from("a new IP pool"), - }, - address: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()), - pool: None, - }; -} +// Project Floating IPs +pub static DEMO_FLOAT_IP_NAME: Lazy = + Lazy::new(|| "float-ip".parse().unwrap()); -lazy_static! { - // Identity providers - pub static ref IDENTITY_PROVIDERS_URL: String = format!("/v1/system/identity-providers?silo=demo-silo"); - pub static ref SAML_IDENTITY_PROVIDERS_URL: String = format!("/v1/system/identity-providers/saml?silo=demo-silo"); +pub static DEMO_FLOAT_IP_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}?project={}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_NAME + ) +}); - pub static ref DEMO_SAML_IDENTITY_PROVIDER_NAME: Name = "demo-saml-provider".parse().unwrap(); - pub static ref SPECIFIC_SAML_IDENTITY_PROVIDER_URL: String = format!("/v1/system/identity-providers/saml/{}?silo=demo-silo", *DEMO_SAML_IDENTITY_PROVIDER_NAME); +pub static DEMO_FLOAT_IP_CREATE: Lazy = + Lazy::new(|| params::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_FLOAT_IP_NAME.clone(), + description: String::from("a new IP pool"), + }, + address: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()), + pool: None, + }); + +// Identity providers +pub const IDENTITY_PROVIDERS_URL: &'static str = + "/v1/system/identity-providers?silo=demo-silo"; +pub const SAML_IDENTITY_PROVIDERS_URL: &'static str = + "/v1/system/identity-providers/saml?silo=demo-silo"; +pub static DEMO_SAML_IDENTITY_PROVIDER_NAME: Lazy = + Lazy::new(|| "demo-saml-provider".parse().unwrap()); + +pub static SPECIFIC_SAML_IDENTITY_PROVIDER_URL: Lazy = + Lazy::new(|| { + format!( + "/v1/system/identity-providers/saml/{}?silo=demo-silo", + *DEMO_SAML_IDENTITY_PROVIDER_NAME + ) + }); - pub static ref SAML_IDENTITY_PROVIDER: params::SamlIdentityProviderCreate = - params::SamlIdentityProviderCreate { - identity: IdentityMetadataCreateParams { - name: DEMO_SAML_IDENTITY_PROVIDER_NAME.clone(), - description: "a demo provider".to_string(), - }, +pub static SAML_IDENTITY_PROVIDER: Lazy = + Lazy::new(|| params::SamlIdentityProviderCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_SAML_IDENTITY_PROVIDER_NAME.clone(), + description: "a demo provider".to_string(), + }, - idp_metadata_source: params::IdpMetadataSource::Url { url: HTTP_SERVER.url("/descriptor").to_string() }, + idp_metadata_source: params::IdpMetadataSource::Url { + url: HTTP_SERVER.url("/descriptor").to_string(), + }, - idp_entity_id: "entity_id".to_string(), - sp_client_id: "client_id".to_string(), - acs_url: "http://acs".to_string(), - slo_url: "http://slo".to_string(), - technical_contact_email: "technical@fake".to_string(), + idp_entity_id: "entity_id".to_string(), + sp_client_id: "client_id".to_string(), + acs_url: "http://acs".to_string(), + slo_url: "http://slo".to_string(), + technical_contact_email: "technical@fake".to_string(), - signing_keypair: None, + signing_keypair: None, - group_attribute_name: None, - }; + group_attribute_name: None, + }); - pub static ref DEMO_SYSTEM_METRICS_URL: String = - format!( - "/v1/system/metrics/virtual_disk_space_provisioned?start_time={:?}&end_time={:?}", - Utc::now(), - Utc::now(), - ); +pub static DEMO_SYSTEM_METRICS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/system/metrics/virtual_disk_space_provisioned?start_time={:?}&end_time={:?}", + Utc::now(), + Utc::now(), + ) +}); - pub static ref DEMO_SILO_METRICS_URL: String = - format!( - "/v1/metrics/virtual_disk_space_provisioned?start_time={:?}&end_time={:?}", - Utc::now(), - Utc::now(), - ); +pub static DEMO_SILO_METRICS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/metrics/virtual_disk_space_provisioned?start_time={:?}&end_time={:?}", + Utc::now(), + Utc::now(), + ) +}); - // Users - pub static ref DEMO_USER_CREATE: params::UserCreate = params::UserCreate { +// Users +pub static DEMO_USER_CREATE: Lazy = + Lazy::new(|| params::UserCreate { external_id: params::UserId::from_str("dummy-user").unwrap(), password: params::UserPassword::LoginDisallowed, - }; -} + }); /// Describes an API endpoint to be verified by the "unauthorized" test /// @@ -779,12 +922,13 @@ impl AllowedMethod { } } -lazy_static! { - pub static ref URL_USERS_DB_INIT: String = - format!("/v1/system/users-builtin/{}", authn::USER_DB_INIT.name); +pub static URL_USERS_DB_INIT: Lazy = Lazy::new(|| { + format!("/v1/system/users-builtin/{}", authn::USER_DB_INIT.name) +}); - /// List of endpoints to be verified - pub static ref VERIFY_ENDPOINTS: Vec = vec![ +/// List of endpoints to be verified +pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { + vec![ // Global IAM policy VerifyEndpoint { url: &SYSTEM_POLICY_URL, @@ -801,7 +945,6 @@ lazy_static! { ), ], }, - // IP Pools top-level endpoint VerifyEndpoint { url: &DEMO_IP_POOLS_URL, @@ -1558,7 +1701,6 @@ lazy_static! { AllowedMethod::GetWebsocket ], }, - /* Instance NICs */ VerifyEndpoint { url: &DEMO_INSTANCE_NICS_URL, @@ -1571,7 +1713,6 @@ lazy_static! { ), ], }, - VerifyEndpoint { url: &DEMO_INSTANCE_NIC_URL, visibility: Visibility::Protected, @@ -1839,7 +1980,6 @@ lazy_static! { unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![AllowedMethod::Get], }, - /* Misc */ VerifyEndpoint { @@ -2066,6 +2206,6 @@ lazy_static! { AllowedMethod::Get, AllowedMethod::Delete, ], - } - ]; -} + }, + ] +}); diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 1cb2eaca3a..317a5a0576 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -13,7 +13,6 @@ use headers::authorization::Credentials; use http::method::Method; use http::StatusCode; use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; -use lazy_static::lazy_static; use nexus_db_queries::authn::external::spoof; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -21,6 +20,7 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::http_testing::TestResponse; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; +use once_cell::sync::Lazy; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -158,8 +158,8 @@ enum SetupReq { }, } -lazy_static! { - pub static ref HTTP_SERVER: httptest::Server = { +pub static HTTP_SERVER: Lazy = + Lazy::new(|| { // Run a httptest server let server = ServerBuilder::new().run().unwrap(); @@ -167,12 +167,10 @@ lazy_static! { server.expect( Expectation::matching(request::method_path("HEAD", "/image.raw")) .times(1..) - .respond_with( - status_code(200).append_header( - "Content-Length", - format!("{}", 4096 * 1000), - ), - ), + .respond_with(status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + )), ); server.expect( @@ -182,10 +180,11 @@ lazy_static! { ); server - }; + }); - /// List of requests to execute at setup time - static ref SETUP_REQUESTS: Vec = vec![ +/// List of requests to execute at setup time +static SETUP_REQUESTS: Lazy> = Lazy::new(|| { + vec![ // Create a separate Silo SetupReq::Post { url: "/v1/system/silos", @@ -203,10 +202,7 @@ lazy_static! { ], }, // Get the default IP pool - SetupReq::Get { - url: &DEMO_IP_POOL_URL, - id_routes: vec![], - }, + SetupReq::Get { url: &DEMO_IP_POOL_URL, id_routes: vec![] }, // Create an IP pool range SetupReq::Post { url: &DEMO_IP_POOL_RANGES_ADD_URL, @@ -302,8 +298,8 @@ lazy_static! { body: serde_json::to_value(&*DEMO_CERTIFICATE_CREATE).unwrap(), id_routes: vec![], }, - ]; -} + ] +}); /// Contents returned from an endpoint that creates a resource that has an id /// From de152304c3f551914db05bda1ddbc4b139e1f766 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 20 Dec 2023 14:20:03 -0800 Subject: [PATCH 119/186] Bump SP versions to 1.0.4 (#4722) --- tools/hubris_checksums | 14 +++++++------- tools/hubris_version | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index 1396af4d60..b63deb9687 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -2df01d7dd17423588c99de4361694efdb6bd375e2f54db053320eeead3e07eda build-gimlet-c-image-default-v1.0.3.zip -8ac0eb6d7817825c6318feb8327f5758a33ccd2479512e3e2424f0eb8e290010 build-gimlet-d-image-default-v1.0.3.zip -eeeb72ec81a843fa1f5093096d1e4500aba6ce01c2d21040a2a10a092595d945 build-gimlet-e-image-default-v1.0.3.zip -de0d9028929322f6d5afc4cb52c198b3402c93a38aa15f9d378617ca1d1112c9 build-psc-b-image-default-v1.0.3.zip -11a6235d852bd75548f12d85b0913cb4ccb0aff3c38bf8a92510a2b9c14dad3c build-psc-c-image-default-v1.0.3.zip -3f863d46a462432f19d3fb5a293b8106da6e138de80271f869692bd29abd994b build-sidecar-b-image-default-v1.0.3.zip -2a9feac7f2da61b843d00edf2693c31c118f202c6cd889d1d1758ea1dd95dbca build-sidecar-c-image-default-v1.0.3.zip +a1a3abb29fb78330c682f8b4f58397f28e296463ac18659af82f762a714f3759 build-gimlet-c-image-default-v1.0.4.zip +f53bc6b8fa825fa1f49b5401b05a14bbd22516c16a8254ef5cd5f3b26b450098 build-gimlet-d-image-default-v1.0.4.zip +a91a1719a03531fdc62608a3b747962b3b7a6dc093ae3810ff35a353ef1e9bf7 build-gimlet-e-image-default-v1.0.4.zip +08ce2931d17d58cde8af49d99de425af4b15384923d2cf79d58000fd2ac5d88c build-psc-b-image-default-v1.0.4.zip +71167b0c889132c3584ba05ee1f7e5917092cd6d7fe8f50f04cdbf3f78321fdf build-psc-c-image-default-v1.0.4.zip +56a02e8620a8343282ee4f205dabcb4898a3acb0e50b6e6eca3919a33a159ee4 build-sidecar-b-image-default-v1.0.4.zip +54eb8d9e202cd69a8cdbdd505276c8c2c1d7f548e2b4234c01887209b190bc91 build-sidecar-c-image-default-v1.0.4.zip diff --git a/tools/hubris_version b/tools/hubris_version index b00c3286fe..0cce8d745a 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.3 psc-v1.0.3 sidecar-v1.0.3) +TAGS=(gimlet-v1.0.4 psc-v1.0.4 sidecar-v1.0.4) From b114324f809a73a62d8cfb3e95ee1cd42e0794fc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 20 Dec 2023 19:24:59 -0800 Subject: [PATCH 120/186] Correct hubris versions (#4723) The previous versions were tagged incorrectly in the caboose --- tools/hubris_checksums | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index b63deb9687..707c67fe0c 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -a1a3abb29fb78330c682f8b4f58397f28e296463ac18659af82f762a714f3759 build-gimlet-c-image-default-v1.0.4.zip -f53bc6b8fa825fa1f49b5401b05a14bbd22516c16a8254ef5cd5f3b26b450098 build-gimlet-d-image-default-v1.0.4.zip -a91a1719a03531fdc62608a3b747962b3b7a6dc093ae3810ff35a353ef1e9bf7 build-gimlet-e-image-default-v1.0.4.zip -08ce2931d17d58cde8af49d99de425af4b15384923d2cf79d58000fd2ac5d88c build-psc-b-image-default-v1.0.4.zip -71167b0c889132c3584ba05ee1f7e5917092cd6d7fe8f50f04cdbf3f78321fdf build-psc-c-image-default-v1.0.4.zip -56a02e8620a8343282ee4f205dabcb4898a3acb0e50b6e6eca3919a33a159ee4 build-sidecar-b-image-default-v1.0.4.zip -54eb8d9e202cd69a8cdbdd505276c8c2c1d7f548e2b4234c01887209b190bc91 build-sidecar-c-image-default-v1.0.4.zip +09f0342eed777495ac0a852f219d2dec45fdc1b860f938f95736851b1627cad7 build-gimlet-c-image-default-v1.0.4.zip +aef9279ba6d1d0ffa64586d71cdf5933eddbe048ce1a10f5f611128a84b53642 build-gimlet-d-image-default-v1.0.4.zip +989f89f0060239b77d92fe068ceae1be406591c997224256c617d77b2ccbf1b0 build-gimlet-e-image-default-v1.0.4.zip +8e41a139bc62ff86b8343989889491739bb90eb46e1a02585252adf3ee540db9 build-psc-b-image-default-v1.0.4.zip +76e35e71714921a1ca5f7f8314fc596e3b5fe1dfd422c59fdc9a62c1ebfeec0e build-psc-c-image-default-v1.0.4.zip +a406045b1d545fd063bb989c84a774e4d09a445618d4a8889ce232a3b45884a7 build-sidecar-b-image-default-v1.0.4.zip +69ba3ac372388058f8a6e58230e7e2964990609f18c0960357d17bfc16f25bae build-sidecar-c-image-default-v1.0.4.zip From ae7c2ed727f04de778707af9efea55db9e4d1f5a Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 21 Dec 2023 15:08:29 -0800 Subject: [PATCH 121/186] [rust] update to Rust 1.74.1 (#4700) I'm hitting an ICE with 1.74.0 that appears to be fixed with 1.74.1. --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 65ee8a9912..2e7a87b58b 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -4,5 +4,5 @@ # # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. -channel = "1.74.0" +channel = "1.74.1" profile = "default" From b19c61a7137a909c58d8ae45fd81e266f9f98f73 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 21 Dec 2023 15:09:06 -0800 Subject: [PATCH 122/186] [authz-macros] accept an optional input_key argument (#4707) In some cases including composite keys, it can be better to make the outside representation of the primary key some kind of struct, rather than passing around tuples of various types. Enable that in the `authz_resource` macro by allowing users to specify an optional `input_key` argument, which represents a better view into the primary key. I'm not entirely sure that the `From` trait is the right thing to use here, but it seems like a pretty low-cost decision to change in the future. As part of this PR I also switched to the prettyplease crate, which as the README explains is more suitable for generated code than rustfmt: https://crates.io/crates/prettyplease --- Cargo.lock | 7 +-- Cargo.toml | 1 + nexus/authz-macros/Cargo.toml | 3 ++ nexus/authz-macros/src/lib.rs | 96 ++++++++++++++++++++++++++++++----- nexus/db-macros/Cargo.toml | 2 +- nexus/db-macros/src/lookup.rs | 15 ++++-- 6 files changed, 103 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 962fe68e02..98275e144f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -354,6 +354,7 @@ version = "0.1.0" dependencies = [ "heck 0.4.1", "omicron-workspace-hack", + "prettyplease", "proc-macro2", "quote", "serde", @@ -1539,9 +1540,9 @@ version = "0.1.0" dependencies = [ "heck 0.4.1", "omicron-workspace-hack", + "prettyplease", "proc-macro2", "quote", - "rustfmt-wrapper", "serde", "serde_tokenstream 0.2.0", "syn 2.0.32", @@ -6147,9 +6148,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.12" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", "syn 2.0.32", diff --git a/Cargo.toml b/Cargo.toml index d651a13bf1..d4f81b0310 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -289,6 +289,7 @@ postgres-protocol = "0.6.6" predicates = "3.0.4" pretty_assertions = "1.4.0" pretty-hex = "0.4.0" +prettyplease = "0.2.15" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } diff --git a/nexus/authz-macros/Cargo.toml b/nexus/authz-macros/Cargo.toml index 15f18cb9c8..816100eb58 100644 --- a/nexus/authz-macros/Cargo.toml +++ b/nexus/authz-macros/Cargo.toml @@ -15,3 +15,6 @@ serde.workspace = true serde_tokenstream.workspace = true syn.workspace = true omicron-workspace-hack.workspace = true + +[dev-dependencies] +prettyplease.workspace = true diff --git a/nexus/authz-macros/src/lib.rs b/nexus/authz-macros/src/lib.rs index d85516f3ea..3d6f265fea 100644 --- a/nexus/authz-macros/src/lib.rs +++ b/nexus/authz-macros/src/lib.rs @@ -95,6 +95,33 @@ use serde_tokenstream::ParseWrapper; /// polar_snippet = FleetChild, /// } /// ``` +/// +/// In some cases, it may be more convenient to identify a composite key with a +/// struct rather than relying on tuples. This is supported too: +/// +/// ```ignore +/// struct SomeCompositeId { +/// foo: String, +/// bar: String, +/// } +/// +/// // There needs to be a `From` impl from the composite ID to the primary key. +/// impl From for (String, String) { +/// fn from(id: SomeCompositeId) -> Self { +/// (id.foo, id.bar) +/// } +/// } +/// +/// authz_resource! { +/// name = "MyResource", +/// parent = "Fleet", +/// primary_key = (String, String), +/// input_key = SomeCompositeId, +/// roles_allowed = false, +/// polar_snippet = FleetChild, +/// } +/// ``` + // Allow private intra-doc links. This is useful because the `Input` struct // cannot be exported (since we're a proc macro crate, and we can't expose // a struct), but its documentation is very useful. @@ -121,6 +148,12 @@ struct Input { parent: String, /// Rust type for the primary key for this resource primary_key: ParseWrapper, + /// Rust type for the input key for this resource (the key users specify + /// for this resource, convertible to `primary_key`). + /// + /// This is the same as primary_key if not specified. + #[serde(default)] + input_key: Option>, /// Whether roles may be attached directly to this resource roles_allowed: bool, /// How to generate the Polar snippet for this resource @@ -153,6 +186,9 @@ fn do_authz_resource( let parent_resource_name = format_ident!("{}", input.parent); let parent_as_snake = heck::AsSnakeCase(&input.parent).to_string(); let primary_key_type = &*input.primary_key; + let input_key_type = + &**input.input_key.as_ref().unwrap_or(&input.primary_key); + let (has_role_body, as_roles_body, api_resource_roles_trait) = if input.roles_allowed { ( @@ -334,6 +370,21 @@ fn do_authz_resource( /// `parent`, unique key `key`, looked up as described by /// `lookup_type` pub fn new( + parent: #parent_resource_name, + key: #input_key_type, + lookup_type: LookupType, + ) -> #resource_name { + #resource_name { + parent, + key: key.into(), + lookup_type, + } + } + + /// A version of `new` that takes the primary key type directly. + /// This is only different from [`Self::new`] if this resource + /// uses a different input key type. + pub fn with_primary_key( parent: #parent_resource_name, key: #primary_key_type, lookup_type: LookupType, @@ -346,7 +397,7 @@ fn do_authz_resource( } pub fn id(&self) -> #primary_key_type { - self.key.clone() + self.key.clone().into() } /// Describes how to register this type with Oso @@ -411,15 +462,36 @@ fn do_authz_resource( // See the test for lookup_resource. #[cfg(test)] -#[test] -fn test_authz_dump() { - let output = do_authz_resource(quote! { - name = "Organization", - parent = "Fleet", - primary_key = Uuid, - roles_allowed = false, - polar_snippet = Custom, - }) - .unwrap(); - println!("{}", output); +mod tests { + use super::*; + #[test] + fn test_authz_dump() { + let output = do_authz_resource(quote! { + name = "Organization", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = Custom, + }) + .unwrap(); + println!("{}", pretty_format(output)); + + let output = do_authz_resource(quote! { + name = "Instance", + parent = "Project", + primary_key = (String, String), + // The SomeCompositeId type doesn't exist, but that's okay because + // this code is never compiled, just printed out. + input_key = SomeCompositeId, + roles_allowed = false, + polar_snippet = InProject, + }) + .unwrap(); + println!("{}", pretty_format(output)); + } + + fn pretty_format(input: TokenStream) -> String { + let parsed = syn::parse2(input).unwrap(); + prettyplease::unparse(&parsed) + } } diff --git a/nexus/db-macros/Cargo.toml b/nexus/db-macros/Cargo.toml index 053c381ac9..64398b266c 100644 --- a/nexus/db-macros/Cargo.toml +++ b/nexus/db-macros/Cargo.toml @@ -18,4 +18,4 @@ syn = { workspace = true, features = ["extra-traits"] } omicron-workspace-hack.workspace = true [dev-dependencies] -rustfmt-wrapper.workspace = true +prettyplease.workspace = true diff --git a/nexus/db-macros/src/lookup.rs b/nexus/db-macros/src/lookup.rs index f2362f5bc5..c7906c7bf0 100644 --- a/nexus/db-macros/src/lookup.rs +++ b/nexus/db-macros/src/lookup.rs @@ -406,7 +406,7 @@ fn generate_misc_helpers(config: &Config) -> TokenStream { db_row: &nexus_db_model::#resource_name, lookup_type: LookupType, ) -> authz::#resource_name { - authz::#resource_name::new( + authz::#resource_name::with_primary_key( authz_parent.clone(), db_row.id(), lookup_type @@ -923,8 +923,8 @@ fn generate_database_functions(config: &Config) -> TokenStream { #[cfg(test)] mod test { use super::lookup_resource; + use proc_macro2::TokenStream; use quote::quote; - use rustfmt_wrapper::rustfmt; #[test] #[ignore] @@ -938,7 +938,7 @@ mod test { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] }) .unwrap(); - println!("{}", rustfmt(output).unwrap()); + println!("{}", pretty_format(output)); let output = lookup_resource(quote! { name = "SiloUser", @@ -949,7 +949,7 @@ mod test { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] }) .unwrap(); - println!("{}", rustfmt(output).unwrap()); + println!("{}", pretty_format(output)); let output = lookup_resource(quote! { name = "UpdateArtifact", @@ -964,6 +964,11 @@ mod test { ] }) .unwrap(); - println!("{}", rustfmt(output).unwrap()); + println!("{}", pretty_format(output)); + } + + fn pretty_format(input: TokenStream) -> String { + let parsed = syn::parse2(input).unwrap(); + prettyplease::unparse(&parsed) } } From b16be3f68c420e223dccb0326deb327ae5be8e50 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 22 Dec 2023 08:13:03 +0000 Subject: [PATCH 123/186] Update Sidecar-lite, ignore ARP hostnames in SoftNPU init (#4725) Bumps to the most recent sidecar-lite, which includes a minor fix for manually added routes, and adds the `-n` flag when checking `arp` for the gateway's MAC address during SoftNPU init. This is really only needed if the gateway is also acting as a DNS server and names itself something cutesy, but it can come up: ``` kyle@farme:~/gits/omicron$ arp -a Net to Media Table: IPv4 Device IP Address Mask Flags Phys Addr ------ -------------------- --------------- -------- --------------- rge0 hub.home.arpa 255.255.255.255 b8:6a:f1:28:cd:00 ... kyle@farme:~/gits/omicron$ arp -an Net to Media Table: IPv4 Device IP Address Mask Flags Phys Addr ------ -------------------- --------------- -------- --------------- rge0 10.0.0.1 255.255.255.255 b8:6a:f1:28:cd:00 ``` --- tools/create_virtual_hardware.sh | 2 +- tools/scrimlet/softnpu-init.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index 7721fb1c0f..fa35bb24ab 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -63,7 +63,7 @@ function ensure_softnpu_zone { --omicron-zone \ --ports sc0_0,tfportrear0_0 \ --ports sc0_1,tfportqsfp0_0 \ - --sidecar-lite-commit 45ed98fea5824feb4d42f45bbf218e597dc9fc58 \ + --sidecar-lite-commit e3ea4b495ba0a71801ded0776ae4bbd31df57e26 \ --softnpu-commit dbab082dfa89da5db5ca2325c257089d2f130092 } "$SOURCE_DIR"/scrimlet/softnpu-init.sh diff --git a/tools/scrimlet/softnpu-init.sh b/tools/scrimlet/softnpu-init.sh index 6a2a9e10ce..59f8e83019 100755 --- a/tools/scrimlet/softnpu-init.sh +++ b/tools/scrimlet/softnpu-init.sh @@ -31,7 +31,7 @@ fi # Add an extrac space at the end of the search pattern passed to `grep`, so that # we can be sure we're matching the exact $GATEWAY_IP, and not something that # shares the same string prefix. -GATEWAY_MAC=${GATEWAY_MAC:=$(arp -a | grep "$GATEWAY_IP " | awk -F ' ' '{print $NF}')} +GATEWAY_MAC=${GATEWAY_MAC:=$(arp -an | grep "$GATEWAY_IP " | awk -F ' ' '{print $NF}')} # Check that the MAC appears to be exactly one MAC address. COUNT=$(grep -c -E '^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$' <(echo "$GATEWAY_MAC")) From 727bc961139e77f9e33d69bb6ebe8fce31bb117d Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Fri, 22 Dec 2023 13:54:54 +0000 Subject: [PATCH 124/186] Disable the SSH daemon in most non-global zones (#4716) This improves things by disabling the SSH daemon in self-assembling zones via the smf profile, and directly in non-self-assembling zones. The service remains enabled in the switch zone for wicket and support. --- sled-agent/src/profile.rs | 31 +++++++++++++++++++++++++++--- sled-agent/src/services.rs | 39 ++++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs index 7c2d8d1738..1addbca4c9 100644 --- a/sled-agent/src/profile.rs +++ b/sled-agent/src/profile.rs @@ -116,12 +116,18 @@ impl Display for ServiceBuilder { pub struct ServiceInstanceBuilder { name: String, + enabled: bool, property_groups: Vec, } impl ServiceInstanceBuilder { pub fn new(name: &str) -> Self { - Self { name: name.to_string(), property_groups: vec![] } + Self { name: name.to_string(), enabled: true, property_groups: vec![] } + } + + pub fn disable(mut self) -> Self { + self.enabled = false; + self } pub fn add_property_group( @@ -137,9 +143,10 @@ impl Display for ServiceInstanceBuilder { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { write!( f, - r#" + r#" "#, - name = self.name + name = self.name, + enabled = self.enabled )?; for property_group in &self.property_groups { @@ -315,6 +322,24 @@ mod tests { ); } + #[test] + fn test_disabled_instance() { + let builder = ProfileBuilder::new("myprofile") + .add_service(ServiceBuilder::new("myservice").add_instance( + ServiceInstanceBuilder::new("default").disable(), + )); + assert_eq!( + format!("{}", builder), + r#" + + + + + +"#, + ); + } + #[test] fn test_property_group() { let builder = ProfileBuilder::new("myprofile").add_service( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 837c2a05df..a9000a1c4b 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1371,8 +1371,8 @@ impl ServiceManager { .add_property_group(dns_config_builder) // We do need to enable the default instance of the // dns/install service. It's enough to just mention it - // here, as the ServiceInstanceBuilder always enables the - // instance being added. + // here, as the ServiceInstanceBuilder enables the + // instance being added by default. .add_instance(ServiceInstanceBuilder::new("default"))) } @@ -1473,6 +1473,8 @@ impl ServiceManager { // // These zones are self-assembling -- after they boot, there should // be no "zlogin" necessary to initialize. + let disabled_ssh_service = ServiceBuilder::new("network/ssh") + .add_instance(ServiceInstanceBuilder::new("default").disable()); match &request { ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -1507,6 +1509,7 @@ impl ServiceManager { ); let profile = ProfileBuilder::new("omicron") + .add_service(disabled_ssh_service) .add_service(clickhouse_service) .add_service(dns_service); profile @@ -1551,6 +1554,7 @@ impl ServiceManager { .add_property_group(config), ); let profile = ProfileBuilder::new("omicron") + .add_service(disabled_ssh_service) .add_service(clickhouse_keeper_service) .add_service(dns_service); profile @@ -1603,6 +1607,7 @@ impl ServiceManager { ); let profile = ProfileBuilder::new("omicron") + .add_service(disabled_ssh_service) .add_service(cockroachdb_service) .add_service(dns_service); profile @@ -1646,12 +1651,15 @@ impl ServiceManager { .add_property("uuid", "astring", uuid) .add_property("store", "astring", "/data"); - let profile = ProfileBuilder::new("omicron").add_service( - ServiceBuilder::new("oxide/crucible/agent").add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); + let profile = ProfileBuilder::new("omicron") + .add_service(disabled_ssh_service) + .add_service( + ServiceBuilder::new("oxide/crucible/agent") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ), + ); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -1685,12 +1693,15 @@ impl ServiceManager { .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port); - let profile = ProfileBuilder::new("omicron").add_service( - ServiceBuilder::new("oxide/crucible/pantry").add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); + let profile = ProfileBuilder::new("omicron") + .add_service(disabled_ssh_service) + .add_service( + ServiceBuilder::new("oxide/crucible/pantry") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ), + ); profile .add_to_zone(&self.inner.log, &installed_zone) .await From a2cef18d7b735580bc8963103b2f0e4e30fd9885 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 22 Dec 2023 10:50:38 -0800 Subject: [PATCH 125/186] Update Rust crate russh to v0.40.2 [SECURITY] (#4714) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 98275e144f..3cdf3dd678 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6908,9 +6908,9 @@ dependencies = [ [[package]] name = "russh" -version = "0.40.1" +version = "0.40.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23955cec4c4186e8c36f42c5d4043f9fd6cab8702fd08ce1971d966b48ec832f" +checksum = "93dab9e1c313d0d04a42e39c0995943fc38c037e2e3fa9c33685777a1aecdfb2" dependencies = [ "aes", "aes-gcm", From 709493b9adcec358fbdf9c3f5025698c589bd1df Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 22 Dec 2023 17:59:42 -0500 Subject: [PATCH 126/186] Rename API to sled_add and take UninitializedSledId as param (#4704) This is the second half of the fix for #4607 --- .../db-queries/src/db/datastore/inventory.rs | 4 +- nexus/src/app/rack.rs | 84 ++++++++++++------- nexus/src/external_api/http_entrypoints.rs | 10 +-- nexus/tests/integration_tests/endpoints.rs | 16 +--- nexus/tests/output/nexus_tags.txt | 2 +- nexus/types/src/external_api/params.rs | 17 ++++ nexus/types/src/inventory.rs | 7 ++ openapi/nexus.json | 20 ++++- openapi/sled-agent.json | 2 +- sled-agent/src/http_entrypoints.rs | 6 +- sled-agent/src/sled_agent.rs | 2 +- 11 files changed, 115 insertions(+), 55 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 31b24a7e75..7d880b4ec0 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -919,7 +919,7 @@ impl DataStore { pub async fn find_hw_baseboard_id( &self, opctx: &OpContext, - baseboard_id: BaseboardId, + baseboard_id: &BaseboardId, ) -> Result { opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?; let conn = self.pool_connection_authorized(opctx).await?; @@ -1442,7 +1442,7 @@ mod test { part_number: "some-part".into(), }; let err = datastore - .find_hw_baseboard_id(&opctx, baseboard_id) + .find_hw_baseboard_id(&opctx, &baseboard_id) .await .unwrap_err(); assert!(matches!(err, Error::ObjectNotFound { .. })); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index c0307e5b5b..a0dcb7fcb1 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -31,6 +31,7 @@ use nexus_types::external_api::params::LinkConfig; use nexus_types::external_api::params::LldpServiceConfig; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfig; +use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::params::{ AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, SwitchPortSettingsCreate, @@ -51,6 +52,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::ResourceType; use omicron_common::api::internal::shared::ExternalPortDiscovery; use sled_agent_client::types::AddSledRequest; use sled_agent_client::types::EarlyNetworkConfigBody; @@ -69,6 +71,19 @@ use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; +// A limit for querying the last inventory collection +// +// We set a limit of 200 here to give us some breathing room when +// querying for cabooses and RoT pages, each of which is "4 per SP/RoT", +// which in a single fully populated rack works out to (32 sleds + 2 +// switches + 1 psc) * 4 = 140. +// +// This feels bad and probably needs more thought; see +// https://github.com/oxidecomputer/omicron/issues/4621 where this limit +// being too low bit us, and it will link to a more general followup +// issue. +const INVENTORY_COLLECTION_LIMIT: u32 = 200; + impl super::Nexus { pub(crate) async fn racks_list( &self, @@ -872,17 +887,7 @@ impl super::Nexus { ) -> ListResultVec { debug!(self.log, "Getting latest collection"); // Grab the SPs from the last collection - // - // We set a limit of 200 here to give us some breathing room when - // querying for cabooses and RoT pages, each of which is "4 per SP/RoT", - // which in a single fully populated rack works out to (32 sleds + 2 - // switches + 1 psc) * 4 = 140. - // - // This feels bad and probably needs more thought; see - // https://github.com/oxidecomputer/omicron/issues/4621 where this limit - // being too low bit us, and it will link to a more general followup - // issue. - let limit = NonZeroU32::new(200).unwrap(); + let limit = NonZeroU32::new(INVENTORY_COLLECTION_LIMIT).unwrap(); let collection = self .db_datastore .inventory_get_latest_collection(opctx, limit) @@ -933,16 +938,18 @@ impl super::Nexus { } /// Add a sled to an intialized rack - pub(crate) async fn add_sled_to_initialized_rack( + pub(crate) async fn sled_add( &self, opctx: &OpContext, - sled: UninitializedSled, + sled: UninitializedSledId, ) -> Result<(), Error> { - let baseboard_id = sled.baseboard.clone().into(); - let hw_baseboard_id = - self.db_datastore.find_hw_baseboard_id(opctx, baseboard_id).await?; + let baseboard_id = sled.clone().into(); + let hw_baseboard_id = self + .db_datastore + .find_hw_baseboard_id(opctx, &baseboard_id) + .await?; - let subnet = self.db_datastore.rack_subnet(opctx, sled.rack_id).await?; + let subnet = self.db_datastore.rack_subnet(opctx, self.rack_id).await?; let rack_subnet = Ipv6Subnet::::from(rack_subnet(Some(subnet))?); @@ -950,16 +957,39 @@ impl super::Nexus { .db_datastore .allocate_sled_underlay_subnet_octets( opctx, - sled.rack_id, + self.rack_id, hw_baseboard_id, ) .await?; + // Grab the SPs from the last collection + let limit = NonZeroU32::new(INVENTORY_COLLECTION_LIMIT).unwrap(); + let collection = self + .db_datastore + .inventory_get_latest_collection(opctx, limit) + .await?; + + // If there isn't a collection, we don't know about the sled + let Some(collection) = collection else { + return Err(Error::unavail("no inventory data available")); + }; + + // Find the revision + let Some(sp) = collection.sps.get(&baseboard_id) else { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: + omicron_common::api::external::LookupType::ByCompositeId( + format!("{sled:?}"), + ), + }); + }; + // Convert the baseboard as necessary let baseboard = sled_agent_client::types::Baseboard::Gimlet { - identifier: sled.baseboard.serial.clone(), - model: sled.baseboard.part.clone(), - revision: sled.baseboard.revision, + identifier: sled.serial.clone(), + model: sled.part.clone(), + revision: sp.baseboard_revision.into(), }; // Make the call to sled-agent @@ -985,13 +1015,11 @@ impl super::Nexus { }, }; let sa = self.get_any_sled_agent(opctx).await?; - sa.add_sled_to_initialized_rack(&req).await.map_err(|e| { - Error::InternalError { - internal_message: format!( - "failed to add sled with baseboard {:?} to rack {}: {e}", - sled.baseboard, allocation.rack_id - ), - } + sa.sled_add(&req).await.map_err(|e| Error::InternalError { + internal_message: format!( + "failed to add sled with baseboard {:?} to rack {}: {e}", + sled, allocation.rack_id + ), })?; Ok(()) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 3e38558760..dde641a4ad 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6,7 +6,7 @@ use super::{ console_api, device_auth, params, - params::ProjectSelector, + params::{ProjectSelector, UninitializedSledId}, shared::UninitializedSled, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, @@ -228,7 +228,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(switch_list)?; api.register(switch_view)?; api.register(sled_list_uninitialized)?; - api.register(add_sled_to_initialized_rack)?; + api.register(sled_add)?; api.register(user_builtin_list)?; api.register(user_builtin_view)?; @@ -4689,15 +4689,15 @@ async fn sled_list_uninitialized( path = "/v1/system/hardware/sleds/", tags = ["system/hardware"] }] -async fn add_sled_to_initialized_rack( +async fn sled_add( rqctx: RequestContext>, - sled: TypedBody, + sled: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.add_sled_to_initialized_rack(&opctx, sled.into_inner()).await?; + nexus.sled_add(&opctx, sled.into_inner()).await?; Ok(HttpResponseUpdatedNoContent()) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index be0ea2a3f5..8708083124 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -20,10 +20,8 @@ use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils::SWITCH_UUID; use nexus_types::external_api::params; use nexus_types::external_api::shared; -use nexus_types::external_api::shared::Baseboard; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; -use nexus_types::external_api::shared::UninitializedSled; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -41,7 +39,6 @@ use once_cell::sync::Lazy; use std::net::IpAddr; use std::net::Ipv4Addr; use std::str::FromStr; -use uuid::Uuid; pub static HARDWARE_RACK_URL: Lazy = Lazy::new(|| format!("/v1/system/hardware/racks/{}", RACK_UUID)); @@ -69,15 +66,10 @@ pub static HARDWARE_SLED_DISK_URL: Lazy = Lazy::new(|| { pub static SLED_INSTANCES_URL: Lazy = Lazy::new(|| { format!("/v1/system/hardware/sleds/{}/instances", SLED_AGENT_UUID) }); -pub static DEMO_UNINITIALIZED_SLED: Lazy = - Lazy::new(|| UninitializedSled { - baseboard: Baseboard { - serial: "demo-serial".to_string(), - part: "demo-part".to_string(), - revision: 6, - }, - rack_id: Uuid::new_v4(), - cubby: 1, +pub static DEMO_UNINITIALIZED_SLED: Lazy = + Lazy::new(|| params::UninitializedSledId { + serial: "demo-serial".to_string(), + part: "demo-part".to_string(), }); // Global policy diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 10e7df7286..b607bbf1f3 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -117,13 +117,13 @@ snapshot_view GET /v1/snapshots/{snapshot} API operations found with tag "system/hardware" OPERATION ID METHOD URL PATH -add_sled_to_initialized_rack POST /v1/system/hardware/sleds networking_switch_port_apply_settings POST /v1/system/hardware/switch-port/{port}/settings networking_switch_port_clear_settings DELETE /v1/system/hardware/switch-port/{port}/settings networking_switch_port_list GET /v1/system/hardware/switch-port physical_disk_list GET /v1/system/hardware/disks rack_list GET /v1/system/hardware/racks rack_view GET /v1/system/hardware/racks/{rack_id} +sled_add POST /v1/system/hardware/sleds sled_instance_list GET /v1/system/hardware/sleds/{sled_id}/instances sled_list GET /v1/system/hardware/sleds sled_list_uninitialized GET /v1/system/hardware/sleds-uninitialized diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index df399e310c..6749794a9a 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -47,6 +47,23 @@ macro_rules! id_path_param { }; } +/// The unique hardware ID for a sled +#[derive( + Clone, + Debug, + Serialize, + Deserialize, + JsonSchema, + PartialOrd, + Ord, + PartialEq, + Eq, +)] +pub struct UninitializedSledId { + pub serial: String, + pub part: String, +} + path_param!(ProjectPath, project, "project"); path_param!(InstancePath, instance, "instance"); path_param!(NetworkInterfacePath, interface, "network interface"); diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 9401727162..77bc73306d 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use strum::EnumIter; use uuid::Uuid; +use crate::external_api::params::UninitializedSledId; use crate::external_api::shared::Baseboard; /// Results of collecting hardware/software inventory from various Omicron @@ -139,6 +140,12 @@ impl From for BaseboardId { } } +impl From for BaseboardId { + fn from(value: UninitializedSledId) -> Self { + BaseboardId { part_number: value.part, serial_number: value.serial } + } +} + /// Caboose contents found during a collection /// /// These are normalized in the database. Each distinct `Caboose` is assigned a diff --git a/openapi/nexus.json b/openapi/nexus.json index 4131460149..815cc399ae 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3765,12 +3765,12 @@ "system/hardware" ], "summary": "Add a sled to an initialized rack", - "operationId": "add_sled_to_initialized_rack", + "operationId": "sled_add", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UninitializedSled" + "$ref": "#/components/schemas/UninitializedSledId" } } }, @@ -14909,6 +14909,22 @@ "rack_id" ] }, + "UninitializedSledId": { + "description": "The unique hardware ID for a sled", + "type": "object", + "properties": { + "part": { + "type": "string" + }, + "serial": { + "type": "string" + } + }, + "required": [ + "part", + "serial" + ] + }, "UninitializedSledResultsPage": { "description": "A single page of results", "type": "object", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 6076df6dbb..467fd32cb8 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -576,7 +576,7 @@ "/sleds": { "put": { "summary": "Add a sled to a rack that was already initialized via RSS", - "operationId": "add_sled_to_initialized_rack", + "operationId": "sled_add", "requestBody": { "content": { "application/json": { diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 8c8a5f2a03..26a0d2ddc2 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -77,7 +77,7 @@ pub fn api() -> SledApiDescription { api.register(uplink_ensure)?; api.register(read_network_bootstore_config_cache)?; api.register(write_network_bootstore_config)?; - api.register(add_sled_to_initialized_rack)?; + api.register(sled_add)?; api.register(metrics_collect)?; api.register(host_os_write_start)?; api.register(host_os_write_status_get)?; @@ -713,7 +713,7 @@ async fn write_network_bootstore_config( method = PUT, path = "/sleds" }] -async fn add_sled_to_initialized_rack( +async fn sled_add( rqctx: RequestContext, body: TypedBody, ) -> Result { @@ -731,7 +731,7 @@ async fn add_sled_to_initialized_rack( )); } - crate::sled_agent::add_sled_to_initialized_rack( + crate::sled_agent::sled_add( sa.logger().clone(), request.sled_id, request.start_request, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5f278b7f38..621d003268 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1105,7 +1105,7 @@ pub enum AddSledError { } /// Add a sled to an initialized rack. -pub async fn add_sled_to_initialized_rack( +pub async fn sled_add( log: Logger, sled_id: Baseboard, request: StartSledAgentRequest, From e760630902e3bd5e2e0a0a5e4518ffa3a0d05df3 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Sat, 30 Dec 2023 12:46:21 -0800 Subject: [PATCH 127/186] Added dtrace_user and dtrace_proc permissions for oxz_ zones (#4736) This allows dtrace inside the oxz_ zones created by Omicron. Partial fix for https://github.com/oxidecomputer/omicron/issues/4731 Co-authored-by: Alan Hanson --- sled-agent/src/services.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index a9000a1c4b..ddfea5d596 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1260,11 +1260,14 @@ impl ServiceManager { // Check the services intended to run in the zone to determine whether any // additional privileges need to be enabled for the zone. fn privs_needed(zone_args: &ZoneArgs<'_>) -> Vec { - let mut needed = Vec::new(); + let mut needed = vec![ + "default".to_string(), + "dtrace_user".to_string(), + "dtrace_proc".to_string(), + ]; for svc_details in zone_args.sled_local_services() { match svc_details { SwitchService::Tfport { .. } => { - needed.push("default".to_string()); needed.push("sys_dl_config".to_string()); } _ => (), @@ -1275,7 +1278,6 @@ impl ServiceManager { match omicron_zone_type { OmicronZoneType::BoundaryNtp { .. } | OmicronZoneType::InternalNtp { .. } => { - needed.push("default".to_string()); needed.push("sys_time".to_string()); needed.push("proc_priocntl".to_string()); } From 44fea0d0acdb90f9fe55a8ae14294abe6890c0b7 Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Sat, 30 Dec 2023 15:15:56 -0800 Subject: [PATCH 128/186] update schemars; address duplicate type names (#4737) --- Cargo.lock | 8 +- Cargo.toml | 2 +- .../src/db/datastore/switch_port.rs | 4 +- nexus/src/app/rack.rs | 12 +- nexus/tests/integration_tests/switch_port.rs | 12 +- nexus/types/src/external_api/params.rs | 18 +-- openapi/nexus.json | 149 +++++++++++++++++- workspace-hack/Cargo.toml | 4 +- 8 files changed, 174 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cdf3dd678..85e42458d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7245,9 +7245,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.13" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763f8cd0d4c71ed8389c90cb8100cba87e763bd01a8e614d4f0af97bcd50a161" +checksum = "45a28f4c49489add4ce10783f7911893516f15afe45d015608d41faca6bc4d29" dependencies = [ "bytes", "chrono", @@ -7260,9 +7260,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.13" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0f696e21e10fa546b7ffb1c9672c6de8fbc7a81acf59524386d8639bf12737" +checksum = "c767fd6fa65d9ccf9cf026122c1b555f2ef9a4f0cea69da4d7dbc3e258d30967" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index d4f81b0310..f7256ce8b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -314,7 +314,7 @@ rustfmt-wrapper = "0.2" rustls = "0.21.9" rustyline = "12.0.0" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } -schemars = "0.8.12" +schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.20", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 221feee23c..4771768e43 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -1196,7 +1196,7 @@ mod test { use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params::{ BgpAnnounceSetCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, - SwitchPortConfig, SwitchPortGeometry, SwitchPortSettingsCreate, + SwitchPortConfigCreate, SwitchPortGeometry, SwitchPortSettingsCreate, }; use omicron_common::api::external::{ IdentityMetadataCreateParams, Name, NameOrId, @@ -1250,7 +1250,7 @@ mod test { name: "test-settings".parse().unwrap(), description: "test settings".into(), }, - port_config: SwitchPortConfig { + port_config: SwitchPortConfigCreate { geometry: SwitchPortGeometry::Qsfp28x1, }, groups: Vec::new(), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a0dcb7fcb1..23ee39415f 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -27,10 +27,10 @@ use nexus_types::external_api::params::BgpAnnounceSetCreate; use nexus_types::external_api::params::BgpAnnouncementCreate; use nexus_types::external_api::params::BgpConfigCreate; use nexus_types::external_api::params::BgpPeer; -use nexus_types::external_api::params::LinkConfig; -use nexus_types::external_api::params::LldpServiceConfig; +use nexus_types::external_api::params::LinkConfigCreate; +use nexus_types::external_api::params::LldpServiceConfigCreate; use nexus_types::external_api::params::RouteConfig; -use nexus_types::external_api::params::SwitchPortConfig; +use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::params::{ AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, @@ -587,7 +587,7 @@ impl super::Nexus { description: "initial uplink configuration".to_string(), }; - let port_config = SwitchPortConfig { + let port_config = SwitchPortConfigCreate { geometry: nexus_types::external_api::params::SwitchPortGeometry::Qsfp28x1, }; @@ -653,9 +653,9 @@ impl super::Nexus { .bgp_peers .insert("phy0".to_string(), BgpPeerConfig { peers }); - let link = LinkConfig { + let link = LinkConfigCreate { mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 - lldp: LldpServiceConfig { + lldp: LldpServiceConfigCreate { enabled: false, lldp_config: None, }, diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index df4d96c6d1..c6e774be09 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -11,9 +11,9 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, BgpPeer, - BgpPeerConfig, LinkConfig, LinkFec, LinkSpeed, LldpServiceConfig, Route, - RouteConfig, SwitchInterfaceConfig, SwitchInterfaceKind, - SwitchPortApplySettings, SwitchPortSettingsCreate, + BgpPeerConfig, LinkConfigCreate, LinkFec, LinkSpeed, + LldpServiceConfigCreate, Route, RouteConfig, SwitchInterfaceConfigCreate, + SwitchInterfaceKind, SwitchPortApplySettings, SwitchPortSettingsCreate, }; use nexus_types::external_api::views::Rack; use omicron_common::api::external::{ @@ -113,9 +113,9 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { // links settings.links.insert( "phy0".into(), - LinkConfig { + LinkConfigCreate { mtu: 4700, - lldp: LldpServiceConfig { enabled: false, lldp_config: None }, + lldp: LldpServiceConfigCreate { enabled: false, lldp_config: None }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, autoneg: false, @@ -124,7 +124,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { // interfaces settings.interfaces.insert( "phy0".into(), - SwitchInterfaceConfig { + SwitchInterfaceConfigCreate { v6_enabled: true, kind: SwitchInterfaceKind::Primary, }, diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 6749794a9a..209d1f607c 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1387,14 +1387,14 @@ pub struct SwtichPortSettingsGroupCreate { pub struct SwitchPortSettingsCreate { #[serde(flatten)] pub identity: IdentityMetadataCreateParams, - pub port_config: SwitchPortConfig, + pub port_config: SwitchPortConfigCreate, pub groups: Vec, /// Links indexed by phy name. On ports that are not broken out, this is /// always phy0. On a 2x breakout the options are phy0 and phy1, on 4x /// phy0-phy3, etc. - pub links: HashMap, + pub links: HashMap, /// Interfaces indexed by link name. - pub interfaces: HashMap, + pub interfaces: HashMap, /// Routes indexed by interface name. pub routes: HashMap, /// BGP peers indexed by interface name. @@ -1407,7 +1407,7 @@ impl SwitchPortSettingsCreate { pub fn new(identity: IdentityMetadataCreateParams) -> Self { Self { identity, - port_config: SwitchPortConfig { + port_config: SwitchPortConfigCreate { geometry: SwitchPortGeometry::Qsfp28x1, }, groups: Vec::new(), @@ -1423,7 +1423,7 @@ impl SwitchPortSettingsCreate { /// Physical switch port configuration. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] -pub struct SwitchPortConfig { +pub struct SwitchPortConfigCreate { /// Link geometry for the switch port. pub geometry: SwitchPortGeometry, } @@ -1526,12 +1526,12 @@ impl From for LinkSpeed { /// Switch link configuration. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct LinkConfig { +pub struct LinkConfigCreate { /// Maximum transmission unit for the link. pub mtu: u16, /// The link-layer discovery protocol (LLDP) configuration for the link. - pub lldp: LldpServiceConfig, + pub lldp: LldpServiceConfigCreate, /// The forward error correction mode of the link. pub fec: LinkFec, @@ -1546,7 +1546,7 @@ pub struct LinkConfig { /// The LLDP configuration associated with a port. LLDP may be either enabled or /// disabled, if enabled, an LLDP configuration must be provided by name or id. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct LldpServiceConfig { +pub struct LldpServiceConfigCreate { /// Whether or not LLDP is enabled. pub enabled: bool, @@ -1558,7 +1558,7 @@ pub struct LldpServiceConfig { /// A layer-3 switch interface configuration. When IPv6 is enabled, a link local /// address will be created for the interface. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SwitchInterfaceConfig { +pub struct SwitchInterfaceConfigCreate { /// Whether or not IPv6 is enabled. pub v6_enabled: bool, diff --git a/openapi/nexus.json b/openapi/nexus.json index 815cc399ae..3b8b3525dc 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -12496,7 +12496,7 @@ "minLength": 1, "maxLength": 11 }, - "LinkConfig": { + "LinkConfigCreate": { "description": "Switch link configuration.", "type": "object", "properties": { @@ -12516,7 +12516,7 @@ "description": "The link-layer discovery protocol (LLDP) configuration for the link.", "allOf": [ { - "$ref": "#/components/schemas/LldpServiceConfig" + "$ref": "#/components/schemas/LldpServiceConfigCreate" } ] }, @@ -12638,6 +12638,31 @@ ] }, "LldpServiceConfig": { + "description": "A link layer discovery protocol (LLDP) service configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Whether or not the LLDP service is enabled.", + "type": "boolean" + }, + "id": { + "description": "The id of this LLDP service instance.", + "type": "string", + "format": "uuid" + }, + "lldp_config_id": { + "nullable": true, + "description": "The link-layer discovery protocol configuration for this service.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "enabled", + "id" + ] + }, + "LldpServiceConfigCreate": { "description": "The LLDP configuration associated with a port. LLDP may be either enabled or disabled, if enabled, an LLDP configuration must be provided by name or id.", "type": "object", "properties": { @@ -14251,6 +14276,45 @@ ] }, "SwitchInterfaceConfig": { + "description": "A switch port interface configuration for a port settings object.", + "type": "object", + "properties": { + "id": { + "description": "A unique identifier for this switch interface.", + "type": "string", + "format": "uuid" + }, + "interface_name": { + "description": "The name of this switch interface.", + "type": "string" + }, + "kind": { + "description": "The switch interface kind.", + "allOf": [ + { + "$ref": "#/components/schemas/SwitchInterfaceKind2" + } + ] + }, + "port_settings_id": { + "description": "The port settings object this switch interface configuration belongs to.", + "type": "string", + "format": "uuid" + }, + "v6_enabled": { + "description": "Whether or not IPv6 is enabled on this interface.", + "type": "boolean" + } + }, + "required": [ + "id", + "interface_name", + "kind", + "port_settings_id", + "v6_enabled" + ] + }, + "SwitchInterfaceConfigCreate": { "description": "A layer-3 switch interface configuration. When IPv6 is enabled, a link local address will be created for the interface.", "type": "object", "properties": { @@ -14329,6 +14393,32 @@ } ] }, + "SwitchInterfaceKind2": { + "description": "Describes the kind of an switch interface.", + "oneOf": [ + { + "description": "Primary interfaces are associated with physical links. There is exactly one primary interface per physical link.", + "type": "string", + "enum": [ + "primary" + ] + }, + { + "description": "VLAN interfaces allow physical interfaces to be multiplexed onto multiple logical links, each distinguished by a 12-bit 802.1Q Ethernet tag.", + "type": "string", + "enum": [ + "vlan" + ] + }, + { + "description": "Loopback interfaces are anchors for IP addresses that are not specific to any particular port.", + "type": "string", + "enum": [ + "loopback" + ] + } + ] + }, "SwitchLocation": { "description": "Identifies switch physical location", "oneOf": [ @@ -14467,6 +14557,29 @@ ] }, "SwitchPortConfig": { + "description": "A physical port configuration for a port settings object.", + "type": "object", + "properties": { + "geometry": { + "description": "The physical link geometry of the port.", + "allOf": [ + { + "$ref": "#/components/schemas/SwitchPortGeometry2" + } + ] + }, + "port_settings_id": { + "description": "The id of the port settings object this configuration belongs to.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "geometry", + "port_settings_id" + ] + }, + "SwitchPortConfigCreate": { "description": "Physical switch port configuration.", "type": "object", "properties": { @@ -14509,6 +14622,32 @@ } ] }, + "SwitchPortGeometry2": { + "description": "The link geometry associated with a switch port.", + "oneOf": [ + { + "description": "The port contains a single QSFP28 link with four lanes.", + "type": "string", + "enum": [ + "qsfp28x1" + ] + }, + { + "description": "The port contains two QSFP28 links each with two lanes.", + "type": "string", + "enum": [ + "qsfp28x2" + ] + }, + { + "description": "The port contains four SFP28 links each with one lane.", + "type": "string", + "enum": [ + "sfp28x4" + ] + } + ] + }, "SwitchPortLinkConfig": { "description": "A link configuration for a port settings object.", "type": "object", @@ -14677,21 +14816,21 @@ "description": "Interfaces indexed by link name.", "type": "object", "additionalProperties": { - "$ref": "#/components/schemas/SwitchInterfaceConfig" + "$ref": "#/components/schemas/SwitchInterfaceConfigCreate" } }, "links": { "description": "Links indexed by phy name. On ports that are not broken out, this is always phy0. On a 2x breakout the options are phy0 and phy1, on 4x phy0-phy3, etc.", "type": "object", "additionalProperties": { - "$ref": "#/components/schemas/LinkConfig" + "$ref": "#/components/schemas/LinkConfigCreate" } }, "name": { "$ref": "#/components/schemas/Name" }, "port_config": { - "$ref": "#/components/schemas/SwitchPortConfig" + "$ref": "#/components/schemas/SwitchPortConfigCreate" }, "routes": { "description": "Routes indexed by interface name.", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8998f7594b..653e8b370a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -85,7 +85,7 @@ regex-automata = { version = "0.4.3", default-features = false, features = ["dfa regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } -schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } +schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } @@ -188,7 +188,7 @@ regex-automata = { version = "0.4.3", default-features = false, features = ["dfa regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } -schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } +schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } From 9274beb0112e50014e8f14cba2f3fbec17a64421 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 31 Dec 2023 05:20:06 +0000 Subject: [PATCH 129/186] Update taiki-e/install-action digest to 56ab793 (#4738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`0f94aa2` -> `56ab793`](https://togithub.com/taiki-e/install-action/compare/0f94aa2...56ab793) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index e70b959f8a..4d9812a44e 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@0f94aa2032e24d01f7ae1cc63f71b13418365efd # v2 + uses: taiki-e/install-action@56ab7930c591507f833cbaed864d201386d518a8 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From cfcf209c4f4b278bbfe27d19c115d657c63b5d76 Mon Sep 17 00:00:00 2001 From: Benjamin Leonard Date: Tue, 2 Jan 2024 15:59:33 +0000 Subject: [PATCH 130/186] Fix endpoint description casing (#4742) Noticed some inconsistencies with the endpoint descriptions. Fixes https://github.com/oxidecomputer/docs/issues/253 --- nexus/src/external_api/http_entrypoints.rs | 14 +++++++------- openapi/nexus.json | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index dde641a4ad..5ac782aee6 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -1294,7 +1294,7 @@ async fn project_policy_update( // IP Pools -/// List all IP Pools that can be used by a given project. +/// List all IP pools that can be used by a given project #[endpoint { method = GET, path = "/v1/ip-pools", @@ -1452,7 +1452,7 @@ async fn ip_pool_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Delete an IP Pool +/// Delete an IP pool #[endpoint { method = DELETE, path = "/v1/system/ip-pools/{pool}", @@ -1474,7 +1474,7 @@ async fn ip_pool_delete( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Update an IP Pool +/// Update an IP pool #[endpoint { method = PUT, path = "/v1/system/ip-pools/{pool}", @@ -1701,7 +1701,7 @@ async fn ip_pool_service_range_remove( // Floating IP Addresses -/// List all Floating IPs +/// List all floating IPs #[endpoint { method = GET, path = "/v1/floating-ips", @@ -1733,7 +1733,7 @@ async fn floating_ip_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Create a Floating IP +/// Create a floating IP #[endpoint { method = POST, path = "/v1/floating-ips", @@ -1759,7 +1759,7 @@ async fn floating_ip_create( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Delete a Floating IP +/// Delete a floating IP #[endpoint { method = DELETE, path = "/v1/floating-ips/{floating_ip}", @@ -4757,7 +4757,7 @@ async fn sled_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Set the sled's provision state. +/// Set the sled's provision state #[endpoint { method = PUT, path = "/v1/system/hardware/sleds/{sled_id}/provision-state", diff --git a/openapi/nexus.json b/openapi/nexus.json index 3b8b3525dc..f2433e5512 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -809,7 +809,7 @@ "tags": [ "floating-ips" ], - "summary": "List all Floating IPs", + "summary": "List all floating IPs", "operationId": "floating_ip_list", "parameters": [ { @@ -876,7 +876,7 @@ "tags": [ "floating-ips" ], - "summary": "Create a Floating IP", + "summary": "Create a floating IP", "operationId": "floating_ip_create", "parameters": [ { @@ -968,7 +968,7 @@ "tags": [ "floating-ips" ], - "summary": "Delete a Floating IP", + "summary": "Delete a floating IP", "operationId": "floating_ip_delete", "parameters": [ { @@ -2154,7 +2154,7 @@ "tags": [ "projects" ], - "summary": "List all IP Pools that can be used by a given project.", + "summary": "List all IP pools that can be used by a given project", "operationId": "project_ip_pool_list", "parameters": [ { @@ -3971,7 +3971,7 @@ "tags": [ "system/hardware" ], - "summary": "Set the sled's provision state.", + "summary": "Set the sled's provision state", "operationId": "sled_set_provision_state", "parameters": [ { @@ -4783,7 +4783,7 @@ "tags": [ "system/networking" ], - "summary": "Update an IP Pool", + "summary": "Update an IP pool", "operationId": "ip_pool_update", "parameters": [ { @@ -4829,7 +4829,7 @@ "tags": [ "system/networking" ], - "summary": "Delete an IP Pool", + "summary": "Delete an IP pool", "operationId": "ip_pool_delete", "parameters": [ { From b984facc5c83e8136a56d18dc2370ab7a4e360dd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 3 Jan 2024 10:37:25 -0800 Subject: [PATCH 131/186] Optimize OID lookup for user-defined types (#4735) # Background Looking up OIDs for ENUMs consumes a non-trival amount of time during initialization, since they are synchronously fetched one-by-one. Although this is amortized for long-lived connections, it incurs a cost on all tests using CRDB, and can be optimized. Additionally, these OID caches are per-connection, which incurs redundant work when we have a connection pool (and we do, with bb8). See #4733 for additional context. Feedback from the Diesel maintainers: > I think it would be reasonable to share this information at pool level... > > [OIDs are] currently queried by one type at the time as we argued that it won't matter for long running applications. There you do the overhead once per connection and then you just hit the cache. I don't think we did really consider short running applications (like tests) for that. # This PR 1. Performs a one-time-lookup of user-defined ENUM OIDs on the first connection, and shares it between subsequent connections. This pre-populates the cache used by Diesel. 2. Modifies all `SqlType` derives to ensure that the `schema` is specified as `public` (this ensures a cache hit) 3. Adds a test to compare the "list of user-defined enums to look up" with the current DB schema, and ensures they're in-sync. # Results I focused on optimizing `integration_tests::basic::test_ping`, as this is very close to a "no-op" version of the `#[nexus_test]`. I wrote a script to repeatedly execute this test in a loop (excluding the first result, as a "warm-up" run to populate CRDB). - Before: Average time over 10 runs: 10.42 seconds - After: Average time over 10 runs: 9.47 seconds That's a small difference, but hopefully it'll provide a dent over "all our tests", and avoid additional user-defined types from slowing down all tests. Part of https://github.com/oxidecomputer/omicron/issues/4733 --- Cargo.lock | 1 + nexus/db-model/src/address_lot.rs | 2 +- nexus/db-model/src/block_size.rs | 2 +- nexus/db-model/src/dataset_kind.rs | 2 +- nexus/db-model/src/dns.rs | 2 +- nexus/db-model/src/external_ip.rs | 2 +- nexus/db-model/src/identity_provider.rs | 2 +- nexus/db-model/src/instance_state.rs | 2 +- nexus/db-model/src/inventory.rs | 10 +- nexus/db-model/src/network_interface.rs | 2 +- nexus/db-model/src/physical_disk_kind.rs | 2 +- nexus/db-model/src/producer_endpoint.rs | 2 +- nexus/db-model/src/role_assignment.rs | 2 +- nexus/db-model/src/saga_types.rs | 2 +- nexus/db-model/src/service_kind.rs | 2 +- nexus/db-model/src/silo.rs | 4 +- nexus/db-model/src/sled_provision_state.rs | 2 +- nexus/db-model/src/sled_resource_kind.rs | 2 +- nexus/db-model/src/snapshot.rs | 2 +- nexus/db-model/src/switch_interface.rs | 2 +- nexus/db-model/src/switch_port.rs | 6 +- nexus/db-model/src/system_update.rs | 4 +- nexus/db-model/src/update_artifact.rs | 2 +- nexus/db-model/src/vpc_firewall_rule.rs | 8 +- nexus/db-model/src/vpc_route.rs | 2 +- nexus/db-model/src/vpc_router.rs | 2 +- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/mod.rs | 1 + nexus/db-queries/src/db/pool.rs | 31 +-- nexus/db-queries/src/db/pool_connection.rs | 303 +++++++++++++++++++++ 30 files changed, 346 insertions(+), 63 deletions(-) create mode 100644 nexus/db-queries/src/db/pool_connection.rs diff --git a/Cargo.lock b/Cargo.lock index 85e42458d4..440e2fe296 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4186,6 +4186,7 @@ dependencies = [ "pem", "petgraph", "pq-sys", + "pretty_assertions", "rand 0.8.5", "rcgen", "ref-cast", diff --git a/nexus/db-model/src/address_lot.rs b/nexus/db-model/src/address_lot.rs index de5a4654c5..4fef2466e6 100644 --- a/nexus/db-model/src/address_lot.rs +++ b/nexus/db-model/src/address_lot.rs @@ -13,7 +13,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "address_lot_kind"))] + #[diesel(postgres_type(name = "address_lot_kind", schema = "public"))] pub struct AddressLotKindEnum; #[derive( diff --git a/nexus/db-model/src/block_size.rs b/nexus/db-model/src/block_size.rs index 1a090f1e44..c947f85388 100644 --- a/nexus/db-model/src/block_size.rs +++ b/nexus/db-model/src/block_size.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "block_size"))] + #[diesel(postgres_type(name = "block_size", schema = "public"))] pub struct BlockSizeEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index d068f48fd3..00317592e8 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "dataset_kind"))] + #[diesel(postgres_type(name = "dataset_kind", schema = "public"))] pub struct DatasetKindEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/dns.rs b/nexus/db-model/src/dns.rs index 6b37362c42..56dd1e0547 100644 --- a/nexus/db-model/src/dns.rs +++ b/nexus/db-model/src/dns.rs @@ -16,7 +16,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "dns_group"))] + #[diesel(postgres_type(name = "dns_group", schema = "public"))] pub struct DnsGroupEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 1a755f0396..6b3f8d5110 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -29,7 +29,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy, QueryId)] - #[diesel(postgres_type(name = "ip_kind"))] + #[diesel(postgres_type(name = "ip_kind", schema = "public"))] pub struct IpKindEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] diff --git a/nexus/db-model/src/identity_provider.rs b/nexus/db-model/src/identity_provider.rs index 6bc55b3220..869d64bc7e 100644 --- a/nexus/db-model/src/identity_provider.rs +++ b/nexus/db-model/src/identity_provider.rs @@ -13,7 +13,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "provider_type"))] + #[diesel(postgres_type(name = "provider_type", schema = "public"))] pub struct IdentityProviderTypeEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index 6b4c71da79..7b98850b43 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -11,7 +11,7 @@ use std::io::Write; impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "instance_state"))] + #[diesel(postgres_type(name = "instance_state", schema = "public"))] pub struct InstanceStateEnum; #[derive(Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index d94334787d..72671fde98 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -26,7 +26,7 @@ use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "hw_power_state"))] + #[diesel(postgres_type(name = "hw_power_state", schema = "public"))] pub struct HwPowerStateEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] @@ -62,7 +62,7 @@ impl From for PowerState { // See [`nexus_types::inventory::RotSlot`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "hw_rot_slot"))] + #[diesel(postgres_type(name = "hw_rot_slot", schema = "public"))] pub struct HwRotSlotEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] @@ -95,7 +95,7 @@ impl From for RotSlot { // See [`nexus_types::inventory::CabooseWhich`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "caboose_which"))] + #[diesel(postgres_type(name = "caboose_which", schema = "public"))] pub struct CabooseWhichEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] @@ -136,7 +136,7 @@ impl From for nexus_types::inventory::CabooseWhich { // See [`nexus_types::inventory::RotPageWhich`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "root_of_trust_page_which"))] + #[diesel(postgres_type(name = "root_of_trust_page_which", schema = "public"))] pub struct RotPageWhichEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] @@ -189,7 +189,7 @@ impl From for nexus_types::inventory::RotPageWhich { // See [`nexus_types::inventory::SpType`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "sp_type"))] + #[diesel(postgres_type(name = "sp_type", schema = "public"))] pub struct SpTypeEnum; #[derive( diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index ada2148516..3d3fabbe66 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -19,7 +19,7 @@ use uuid::Uuid; impl_enum_type! { #[derive(SqlType, QueryId, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "network_interface_kind"))] + #[diesel(postgres_type(name = "network_interface_kind", schema = "public"))] pub struct NetworkInterfaceKindEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] diff --git a/nexus/db-model/src/physical_disk_kind.rs b/nexus/db-model/src/physical_disk_kind.rs index a55d42beef..fe86c801d0 100644 --- a/nexus/db-model/src/physical_disk_kind.rs +++ b/nexus/db-model/src/physical_disk_kind.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; impl_enum_type!( #[derive(Clone, SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "physical_disk_kind"))] + #[diesel(postgres_type(name = "physical_disk_kind", schema = "public"))] pub struct PhysicalDiskKindEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index f282f6f08f..55533690f1 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -12,7 +12,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Copy, Clone, Debug, QueryId)] - #[diesel(postgres_type(name = "producer_kind"))] + #[diesel(postgres_type(name = "producer_kind", schema = "public"))] pub struct ProducerKindEnum; #[derive(AsExpression, Copy, Clone, Debug, FromSqlRow, PartialEq)] diff --git a/nexus/db-model/src/role_assignment.rs b/nexus/db-model/src/role_assignment.rs index 45b0c65e37..fbbe18579e 100644 --- a/nexus/db-model/src/role_assignment.rs +++ b/nexus/db-model/src/role_assignment.rs @@ -12,7 +12,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "identity_type"))] + #[diesel(postgres_type(name = "identity_type", schema = "public"))] pub struct IdentityTypeEnum; #[derive( diff --git a/nexus/db-model/src/saga_types.rs b/nexus/db-model/src/saga_types.rs index f2a8b57659..bb21e803bc 100644 --- a/nexus/db-model/src/saga_types.rs +++ b/nexus/db-model/src/saga_types.rs @@ -140,7 +140,7 @@ where } #[derive(Clone, Copy, Debug, PartialEq, SqlType)] -#[diesel(postgres_type(name = "saga_state"))] +#[diesel(postgres_type(name = "saga_state", schema = "public"))] pub struct SagaCachedStateEnum; /// Newtype wrapper around [`steno::SagaCachedState`] which implements diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 4210c3ee20..016de9c44e 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -10,7 +10,7 @@ use strum::EnumIter; impl_enum_type!( #[derive(Clone, SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "service_kind"))] + #[diesel(postgres_type(name = "service_kind", schema = "public"))] pub struct ServiceKindEnum; #[derive(Clone, Copy, Debug, Eq, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, EnumIter)] diff --git a/nexus/db-model/src/silo.rs b/nexus/db-model/src/silo.rs index 21d12cd7f1..66520fccb1 100644 --- a/nexus/db-model/src/silo.rs +++ b/nexus/db-model/src/silo.rs @@ -20,7 +20,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "authentication_mode"))] + #[diesel(postgres_type(name = "authentication_mode", schema = "public"))] pub struct AuthenticationModeEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq, Eq)] @@ -52,7 +52,7 @@ impl From for shared::AuthenticationMode { impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "user_provision_type"))] + #[diesel(postgres_type(name = "user_provision_type", schema = "public"))] pub struct UserProvisionTypeEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq, Eq)] diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs index b2b1ee39dc..ada842a32f 100644 --- a/nexus/db-model/src/sled_provision_state.rs +++ b/nexus/db-model/src/sled_provision_state.rs @@ -9,7 +9,7 @@ use thiserror::Error; impl_enum_type!( #[derive(Clone, SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "sled_provision_state"))] + #[diesel(postgres_type(name = "sled_provision_state", schema = "public"))] pub struct SledProvisionStateEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/sled_resource_kind.rs b/nexus/db-model/src/sled_resource_kind.rs index 1c92431cfa..c17eb2e106 100644 --- a/nexus/db-model/src/sled_resource_kind.rs +++ b/nexus/db-model/src/sled_resource_kind.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; impl_enum_type!( #[derive(Clone, SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "sled_resource_kind"))] + #[diesel(postgres_type(name = "sled_resource_kind", schema = "public"))] pub struct SledResourceKindEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/snapshot.rs b/nexus/db-model/src/snapshot.rs index 2a93f03f69..6c160e5c6b 100644 --- a/nexus/db-model/src/snapshot.rs +++ b/nexus/db-model/src/snapshot.rs @@ -14,7 +14,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "snapshot_state"))] + #[diesel(postgres_type(name = "snapshot_state", schema = "public"))] pub struct SnapshotStateEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/switch_interface.rs b/nexus/db-model/src/switch_interface.rs index f0c4b91de6..71673354ea 100644 --- a/nexus/db-model/src/switch_interface.rs +++ b/nexus/db-model/src/switch_interface.rs @@ -14,7 +14,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "switch_interface_kind"))] + #[diesel(postgres_type(name = "switch_interface_kind", schema = "public"))] pub struct DbSwitchInterfaceKindEnum; #[derive( diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index 6ff8612d2f..6ed918dae5 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -23,7 +23,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "switch_port_geometry"))] + #[diesel(postgres_type(name = "switch_port_geometry", schema = "public"))] pub struct SwitchPortGeometryEnum; #[derive( @@ -46,7 +46,7 @@ impl_enum_type!( impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "switch_link_fec"))] + #[diesel(postgres_type(name = "switch_link_fec", schema = "public"))] pub struct SwitchLinkFecEnum; #[derive( @@ -69,7 +69,7 @@ impl_enum_type!( impl_enum_type!( #[derive(SqlType, Debug, Clone, Copy)] - #[diesel(postgres_type(name = "switch_link_speed"))] + #[diesel(postgres_type(name = "switch_link_speed", schema = "public"))] pub struct SwitchLinkSpeedEnum; #[derive( diff --git a/nexus/db-model/src/system_update.rs b/nexus/db-model/src/system_update.rs index c8ae66648e..17421936b1 100644 --- a/nexus/db-model/src/system_update.rs +++ b/nexus/db-model/src/system_update.rs @@ -59,7 +59,7 @@ impl From for views::SystemUpdate { impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_status"))] + #[diesel(postgres_type(name = "update_status", schema = "public"))] pub struct UpdateStatusEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] @@ -81,7 +81,7 @@ impl From for views::UpdateStatus { impl_enum_type!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "updateable_component_type"))] + #[diesel(postgres_type(name = "updateable_component_type", schema = "public"))] pub struct UpdateableComponentTypeEnum; #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] diff --git a/nexus/db-model/src/update_artifact.rs b/nexus/db-model/src/update_artifact.rs index 196dd6db4d..97c57b44cc 100644 --- a/nexus/db-model/src/update_artifact.rs +++ b/nexus/db-model/src/update_artifact.rs @@ -14,7 +14,7 @@ use std::io::Write; impl_enum_wrapper!( #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_artifact_kind"))] + #[diesel(postgres_type(name = "update_artifact_kind", schema = "public"))] pub struct KnownArtifactKindEnum; #[derive(Clone, Copy, Debug, Display, AsExpression, FromSqlRow, PartialEq, Eq, Serialize, Deserialize)] diff --git a/nexus/db-model/src/vpc_firewall_rule.rs b/nexus/db-model/src/vpc_firewall_rule.rs index 6208d589ff..2d19796524 100644 --- a/nexus/db-model/src/vpc_firewall_rule.rs +++ b/nexus/db-model/src/vpc_firewall_rule.rs @@ -19,7 +19,7 @@ use uuid::Uuid; impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "vpc_firewall_rule_status"))] + #[diesel(postgres_type(name = "vpc_firewall_rule_status", schema = "public"))] pub struct VpcFirewallRuleStatusEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize)] @@ -34,7 +34,7 @@ NewtypeDeref! { () pub struct VpcFirewallRuleStatus(external::VpcFirewallRuleSta impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "vpc_firewall_rule_direction"))] + #[diesel(postgres_type(name = "vpc_firewall_rule_direction", schema = "public"))] pub struct VpcFirewallRuleDirectionEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize)] @@ -49,7 +49,7 @@ NewtypeDeref! { () pub struct VpcFirewallRuleDirection(external::VpcFirewallRule impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "vpc_firewall_rule_action"))] + #[diesel(postgres_type(name = "vpc_firewall_rule_action", schema = "public"))] pub struct VpcFirewallRuleActionEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize)] @@ -64,7 +64,7 @@ NewtypeDeref! { () pub struct VpcFirewallRuleAction(external::VpcFirewallRuleAct impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "vpc_firewall_rule_protocol"))] + #[diesel(postgres_type(name = "vpc_firewall_rule_protocol", schema = "public"))] pub struct VpcFirewallRuleProtocolEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize)] diff --git a/nexus/db-model/src/vpc_route.rs b/nexus/db-model/src/vpc_route.rs index 7f68f81254..168ed41cef 100644 --- a/nexus/db-model/src/vpc_route.rs +++ b/nexus/db-model/src/vpc_route.rs @@ -19,7 +19,7 @@ use uuid::Uuid; impl_enum_wrapper!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "router_route_kind"))] + #[diesel(postgres_type(name = "router_route_kind", schema = "public"))] pub struct RouterRouteKindEnum; #[derive(Clone, Debug, AsExpression, FromSqlRow)] diff --git a/nexus/db-model/src/vpc_router.rs b/nexus/db-model/src/vpc_router.rs index 676bc17ec4..71c753e6aa 100644 --- a/nexus/db-model/src/vpc_router.rs +++ b/nexus/db-model/src/vpc_router.rs @@ -14,7 +14,7 @@ use uuid::Uuid; impl_enum_type!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "vpc_router_kind"))] + #[diesel(postgres_type(name = "vpc_router_kind", schema = "public"))] pub struct VpcRouterKindEnum; #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index d5320be733..cae42a0944 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -72,6 +72,7 @@ omicron-test-utils.workspace = true openapiv3.workspace = true pem.workspace = true petgraph.workspace = true +pretty_assertions.workspace = true rcgen.workspace = true regex.workspace = true rustls.workspace = true diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 924eab363f..e21ba2e3a8 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -24,6 +24,7 @@ pub mod lookup; // Public for doctests. pub mod pagination; mod pool; +mod pool_connection; // This is marked public because the error types are used elsewhere, e.g., in // sagas. pub mod queries; diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index 249852d832..497c8d97c5 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -25,16 +25,10 @@ // TODO-design Need TLS support (the types below hardcode NoTls). use super::Config as DbConfig; -use async_bb8_diesel::AsyncSimpleConnection; -use async_bb8_diesel::Connection; use async_bb8_diesel::ConnectionError; use async_bb8_diesel::ConnectionManager; -use async_trait::async_trait; -use bb8::CustomizeConnection; -use diesel::PgConnection; -use diesel_dtrace::DTraceConnection; -pub type DbConnection = DTraceConnection; +pub use super::pool_connection::DbConnection; /// Wrapper around a database connection pool. /// @@ -76,7 +70,9 @@ impl Pool { let error_sink = LoggingErrorSink::new(log); let manager = ConnectionManager::::new(url); let pool = builder - .connection_customizer(Box::new(DisallowFullTableScans {})) + .connection_customizer(Box::new( + super::pool_connection::ConnectionCustomizer::new(), + )) .error_sink(Box::new(error_sink)) .build_unchecked(manager); Pool { pool } @@ -88,25 +84,6 @@ impl Pool { } } -const DISALLOW_FULL_TABLE_SCAN_SQL: &str = - "set disallow_full_table_scans = on; set large_full_scan_rows = 0;"; - -#[derive(Debug)] -struct DisallowFullTableScans {} -#[async_trait] -impl CustomizeConnection, ConnectionError> - for DisallowFullTableScans -{ - async fn on_acquire( - &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL) - .await - .map_err(|e| e.into()) - } -} - #[derive(Clone, Debug)] struct LoggingErrorSink { log: slog::Logger, diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs new file mode 100644 index 0000000000..e96a15894d --- /dev/null +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -0,0 +1,303 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Customization that happens on each connection as they're acquired. + +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use async_bb8_diesel::AsyncSimpleConnection; +use async_bb8_diesel::Connection; +use async_bb8_diesel::ConnectionError; +use async_trait::async_trait; +use bb8::CustomizeConnection; +use diesel::pg::GetPgMetadataCache; +use diesel::pg::PgMetadataCacheKey; +use diesel::prelude::*; +use diesel::PgConnection; +use diesel_dtrace::DTraceConnection; +use std::collections::HashMap; +use tokio::sync::Mutex; + +pub type DbConnection = DTraceConnection; + +// This is a list of all user-defined types (ENUMS) in the current DB schema. +// +// Diesel looks up user-defined types as they are encountered, and loads +// them into a metadata cache. Although this cost is amortized over the lifetime +// of a connection, this can be slower than desired: +// - Diesel issues a round-trip database call on each user-defined type +// - The cache of OIDs for user-defined types is "per-connection", so when +// using a connection pool, we redo all these calls for new connections. +// +// To mitigate: We look up a list of user-defined types here on first access +// to the connection, and pre-populate the cache. Furthermore, we save this +// information and use it to populate other connections too, without incurring +// another database lookup. +// +// See https://github.com/oxidecomputer/omicron/issues/4733 for more context. +static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ + "address_lot_kind", + "authentication_mode", + "block_size", + "caboose_which", + "dataset_kind", + "dns_group", + "hw_power_state", + "hw_rot_slot", + "identity_type", + "instance_state", + "ip_kind", + "network_interface_kind", + "physical_disk_kind", + "producer_kind", + "provider_type", + "root_of_trust_page_which", + "router_route_kind", + "saga_state", + "service_kind", + "sled_provision_state", + "sled_resource_kind", + "snapshot_state", + "sp_type", + "switch_interface_kind", + "switch_link_fec", + "switch_link_speed", + "switch_port_geometry", + "update_artifact_kind", + "update_status", + "updateable_component_type", + "user_provision_type", + "vpc_firewall_rule_action", + "vpc_firewall_rule_direction", + "vpc_firewall_rule_protocol", + "vpc_firewall_rule_status", + "vpc_router_kind", +]; +const CUSTOM_TYPE_SCHEMA: &'static str = "public"; + +const DISALLOW_FULL_TABLE_SCAN_SQL: &str = + "set disallow_full_table_scans = on; set large_full_scan_rows = 0;"; + +#[derive(Debug)] +struct OIDCache(HashMap, (u32, u32)>); + +impl OIDCache { + // Populate a new OID cache by pre-filling values + async fn new( + conn: &mut Connection, + ) -> Result { + // Lookup all the OIDs for custom types. + // + // As a reminder, this is an optimization: + // - If we supply a value in CUSTOM_TYPE_KEYS that does not + // exist in the schema, the corresponding row won't be + // found, so the value will be ignored. + // - If we don't supply a value in CUSTOM_TYPE_KEYS, even + // though it DOES exist in the schema, it'll likewise not + // get pre-populated into the cache. Diesel would observe + // the cache miss, and perform the lookup later. + let results: Vec = pg_type::table + .select((pg_type::typname, pg_type::oid, pg_type::typarray)) + .inner_join( + pg_namespace::table + .on(pg_type::typnamespace.eq(pg_namespace::oid)), + ) + .filter(pg_type::typname.eq_any(CUSTOM_TYPE_KEYS)) + .filter(pg_namespace::nspname.eq(CUSTOM_TYPE_SCHEMA)) + .load_async(&*conn) + .await?; + + // Convert the OIDs into a ("Cache Key", "OID Tuple") pair, + // and store the result in a HashMap. + // + // We'll iterate over this HashMap to pre-populate the connection-local cache for all + // future connections, including this one. + Ok::<_, ConnectionError>(Self(HashMap::from_iter( + results.into_iter().map( + |PgTypeMetadata { typname, oid, array_oid }| { + ( + PgMetadataCacheKey::new( + Some(CUSTOM_TYPE_SCHEMA.into()), + std::borrow::Cow::Owned(typname), + ), + (oid, array_oid), + ) + }, + ), + ))) + } +} + +// String-based representation of the CockroachDB version. +// +// We currently do minimal parsing of this value, but it should +// be distinct between different revisions of CockroachDB. +// This version includes the semver version of the DB, but also +// build and target information. +#[derive(Debug, Eq, PartialEq, Hash)] +struct CockroachVersion(String); + +impl CockroachVersion { + async fn new( + conn: &Connection, + ) -> Result { + diesel::sql_function!(fn version() -> Text); + + let version = + diesel::select(version()).get_result_async::(conn).await?; + Ok(Self(version)) + } +} + +/// A customizer for all new connections made to CockroachDB, from Diesel. +#[derive(Debug)] +pub(crate) struct ConnectionCustomizer { + oid_caches: Mutex>, +} + +impl ConnectionCustomizer { + pub(crate) fn new() -> Self { + Self { oid_caches: Mutex::new(HashMap::new()) } + } + + async fn populate_metadata_cache( + &self, + conn: &mut Connection, + ) -> Result<(), ConnectionError> { + // Look up the CockroachDB version for new connections, to ensure + // that OID caches are distinct between different CRDB versions. + // + // This step is performed out of an abundance of caution: OIDs are not + // necessarily stable across major releases of CRDB, and this ensures + // that the OID lookups on custom types do not cross this version + // boundary. + let version = CockroachVersion::new(conn).await?; + + // Lookup the OID cache, or populate it if we haven't previously + // established a connection to this database version. + let mut oid_caches = self.oid_caches.lock().await; + let entry = oid_caches.entry(version); + use std::collections::hash_map::Entry::*; + let oid_cache = match entry { + Occupied(ref entry) => entry.get(), + Vacant(entry) => entry.insert(OIDCache::new(conn).await?), + }; + + // Copy the OID cache into this specific connection. + // + // NOTE: I don't love that this is blocking (due to "as_sync_conn"), but the + // "get_metadata_cache" method does not seem implemented for types that could have a + // non-Postgres backend. + let mut sync_conn = conn.as_sync_conn(); + let cache = sync_conn.get_metadata_cache(); + for (k, v) in &oid_cache.0 { + cache.store_type(k.clone(), *v); + } + Ok(()) + } + + async fn disallow_full_table_scans( + &self, + conn: &mut Connection, + ) -> Result<(), ConnectionError> { + conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await?; + Ok(()) + } +} + +#[async_trait] +impl CustomizeConnection, ConnectionError> + for ConnectionCustomizer +{ + async fn on_acquire( + &self, + conn: &mut Connection, + ) -> Result<(), ConnectionError> { + self.populate_metadata_cache(conn).await?; + self.disallow_full_table_scans(conn).await?; + Ok(()) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, Queryable)] +pub struct PgTypeMetadata { + typname: String, + oid: u32, + array_oid: u32, +} + +table! { + pg_type (oid) { + oid -> Oid, + typname -> Text, + typarray -> Oid, + typnamespace -> Oid, + } +} + +table! { + pg_namespace (oid) { + oid -> Oid, + nspname -> Text, + } +} + +allow_tables_to_appear_in_same_query!(pg_type, pg_namespace); + +#[cfg(test)] +mod test { + use super::*; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + + // Ensure that the "CUSTOM_TYPE_KEYS" values match the enums + // we find within the database. + // + // If the two are out-of-sync, identify the values causing problems. + #[tokio::test] + async fn all_enums_in_prepopulate_list() { + let logctx = dev::test_setup_log("test_project_creation"); + let mut crdb = test_setup_database(&logctx.log).await; + let client = crdb.connect().await.expect("Failed to connect to CRDB"); + + // https://www.cockroachlabs.com/docs/stable/show-enums + let rows = client + .query("SHOW ENUMS FROM omicron.public;", &[]) + .await + .unwrap_or_else(|_| panic!("failed to list enums")); + client.cleanup().await.expect("cleaning up after listing enums"); + + let mut observed_public_enums = rows + .into_iter() + .map(|row| -> String { + for i in 0..row.len() { + if row.columns()[i].name() == "name" { + return row.get(i); + } + } + panic!("Missing 'name' in row: {row:?}"); + }) + .collect::>(); + observed_public_enums.sort(); + + let mut expected_enums: Vec = + CUSTOM_TYPE_KEYS.into_iter().map(|s| s.to_string()).collect(); + expected_enums.sort(); + + pretty_assertions::assert_eq!( + observed_public_enums, + expected_enums, + "Enums did not match.\n\ + If the type is present on the left, but not the right:\n\ + \tThe enum is in the DB, but not in CUSTOM_TYPE_KEYS.\n\ + \tConsider adding it, so we can pre-populate the OID cache.\n\ + If the type is present on the right, but not the left:\n\ + \tThe enum is not the DB, but it is in CUSTOM_TYPE_KEYS.\n\ + \tConsider removing it, because the type no longer exists" + ); + + crdb.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} From 9013ec9db132529d8e7b4cd9016b435a7b464afa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:23:57 -0800 Subject: [PATCH 132/186] Update Rust crate async-trait to 0.1.77 (#4739) --- Cargo.lock | 122 +++++++++++++++++++------------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 8 +-- 3 files changed, 66 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 440e2fe296..7473c752a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,7 +168,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -275,7 +275,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -297,18 +297,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] name = "async-trait" -version = "0.1.74" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -359,7 +359,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -496,7 +496,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.32", + "syn 2.0.46", "which", ] @@ -1001,7 +1001,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1443,7 +1443,7 @@ checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1491,7 +1491,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1513,7 +1513,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core 0.20.3", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1545,7 +1545,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1597,7 +1597,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1630,7 +1630,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1650,7 +1650,7 @@ checksum = "48d9b1fc2a6d7e19c89e706a3769e31ee862ac7a4c810c7c0ff3910e1a42a4ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1737,7 +1737,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1746,7 +1746,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -1989,7 +1989,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -2355,7 +2355,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -2472,7 +2472,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -3600,7 +3600,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" dependencies = [ "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -4003,7 +4003,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -4299,7 +4299,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -4450,7 +4450,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -5163,7 +5163,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.32", + "syn 2.0.46", "time", "time-macros", "tokio", @@ -5276,7 +5276,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -5559,7 +5559,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -5699,7 +5699,7 @@ dependencies = [ "regex", "regex-syntax 0.7.5", "structmeta", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -5839,7 +5839,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -5909,7 +5909,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -6154,7 +6154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -6202,9 +6202,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" dependencies = [ "unicode-ident", ] @@ -6250,7 +6250,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.32", + "syn 2.0.46", "thiserror", "typify", "unicode-ident", @@ -6270,7 +6270,7 @@ dependencies = [ "serde_json", "serde_tokenstream 0.2.0", "serde_yaml", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -6371,9 +6371,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -6646,7 +6646,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -6893,7 +6893,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.32", + "syn 2.0.46", "unicode-ident", ] @@ -7412,7 +7412,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -7473,7 +7473,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -7505,7 +7505,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -7546,7 +7546,7 @@ dependencies = [ "darling 0.20.3", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -7861,7 +7861,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8213,7 +8213,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8224,7 +8224,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8283,7 +8283,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8331,9 +8331,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.32" +version = "2.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" +checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" dependencies = [ "proc-macro2", "quote", @@ -8515,7 +8515,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8555,7 +8555,7 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8742,7 +8742,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -8976,7 +8976,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -9221,7 +9221,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.32", + "syn 2.0.46", "thiserror", "unicode-ident", ] @@ -9237,7 +9237,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream 0.2.0", - "syn 2.0.32", + "syn 2.0.46", "typify-impl", ] @@ -9598,7 +9598,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", "wasm-bindgen-shared", ] @@ -9632,7 +9632,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -10240,7 +10240,7 @@ checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -10251,7 +10251,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] @@ -10271,7 +10271,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.46", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f7256ce8b4..fc2aa53465 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.12" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } -async-trait = "0.1.74" +async-trait = "0.1.77" atomicwrites = "0.4.2" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 653e8b370a..4ff9d05374 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -77,7 +77,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -proc-macro2 = { version = "1.0.69" } +proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } @@ -97,7 +97,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.35.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -180,7 +180,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -proc-macro2 = { version = "1.0.69" } +proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } @@ -200,7 +200,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.35.0", features = ["full", "test-util"] } From d53cc0d46ec3cdc561b0b107f12da1a6fd3024d5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:24:20 -0800 Subject: [PATCH 133/186] Update Rust crate hkdf to 0.12.4 (#4692) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7473c752a0..71d72d34b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2892,9 +2892,9 @@ checksum = "b4ba82c000837f4e74df01a5520f0dc48735d4aed955a99eae4428bab7cf3acd" [[package]] name = "hkdf" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791a029f6b9fc27657f6f188ec6e5e43f6911f6f878e0dc5501396e09809d437" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" dependencies = [ "hmac", ] diff --git a/Cargo.toml b/Cargo.toml index fc2aa53465..ad13ae998b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -208,7 +208,7 @@ heck = "0.4" hex = "0.4.3" hex-literal = "0.4.1" highway = "1.1.0" -hkdf = "0.12.3" +hkdf = "0.12.4" http = "0.2.11" httptest = "0.15.5" hubtools = { git = "https://github.com/oxidecomputer/hubtools.git", branch = "main" } From 623f27ea79c7a727f2bc0b658e1ed7d75f147b52 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:24:40 -0800 Subject: [PATCH 134/186] Bump unsafe-libyaml from 0.2.9 to 0.2.10 (#4726) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71d72d34b6..4a7fbbba60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9325,9 +9325,9 @@ dependencies = [ [[package]] name = "unsafe-libyaml" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" +checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" [[package]] name = "untrusted" From 1666711959673df2d9c0a95e511f652e296ab919 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:25:19 -0800 Subject: [PATCH 135/186] Update Rust crate sqlformat to 0.2.3 (#4685) --- oximeter/db/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index 99985a3b80..c4ee44acb6 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -28,7 +28,7 @@ slog.workspace = true slog-async.workspace = true slog-term.workspace = true sqlparser.workspace = true -sqlformat = "0.2.2" +sqlformat = "0.2.3" tabled.workspace = true thiserror.workspace = true usdt.workspace = true From 136ad11e96bf432e9d11011cc3aa6f6b0a209a92 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 4 Jan 2024 11:23:07 -0800 Subject: [PATCH 136/186] [wicketd] move shared update-related code into a new crate (#4659) Introduce a new crate called "update-common" which consists of update-related code shared between Nexus and wicketd. This sits above tufaceous-lib and below wicketd (and soon Nexus). --- Cargo.lock | 30 ++++++++++++ Cargo.toml | 3 ++ update-common/Cargo.toml | 32 ++++++++++++ update-common/src/artifacts/artifact_types.rs | 31 ++++++++++++ .../src/artifacts/artifacts_with_plan.rs | 22 ++++----- .../src/artifacts/extracted_artifacts.rs | 22 ++++----- update-common/src/artifacts/mod.rs | 15 ++++++ .../src/artifacts/update_plan.rs | 49 +++++++++---------- .../error.rs => update-common/src/errors.rs | 6 ++- update-common/src/lib.rs | 8 +++ wicketd/Cargo.toml | 1 + wicketd/src/artifacts.rs | 29 ----------- wicketd/src/artifacts/store.rs | 6 +-- wicketd/src/update_tracker.rs | 4 +- 14 files changed, 173 insertions(+), 85 deletions(-) create mode 100644 update-common/Cargo.toml create mode 100644 update-common/src/artifacts/artifact_types.rs rename {wicketd => update-common}/src/artifacts/artifacts_with_plan.rs (96%) rename {wicketd => update-common}/src/artifacts/extracted_artifacts.rs (95%) create mode 100644 update-common/src/artifacts/mod.rs rename {wicketd => update-common}/src/artifacts/update_plan.rs (97%) rename wicketd/src/artifacts/error.rs => update-common/src/errors.rs (98%) create mode 100644 update-common/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 4a7fbbba60..738d046235 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9341,6 +9341,35 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "update-common" +version = "0.1.0" +dependencies = [ + "anyhow", + "bytes", + "camino", + "camino-tempfile", + "clap 4.4.3", + "debug-ignore", + "display-error-chain", + "dropshot", + "futures", + "hex", + "hubtools", + "omicron-common", + "omicron-test-utils", + "omicron-workspace-hack", + "rand 0.8.5", + "sha2", + "slog", + "thiserror", + "tokio", + "tokio-util", + "tough", + "tufaceous", + "tufaceous-lib", +] + [[package]] name = "update-engine" version = "0.1.0" @@ -9844,6 +9873,7 @@ dependencies = [ "trust-dns-resolver", "tufaceous", "tufaceous-lib", + "update-common", "update-engine", "uuid", "wicket", diff --git a/Cargo.toml b/Cargo.toml index ad13ae998b..f96d0f2914 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ members = [ "test-utils", "tufaceous-lib", "tufaceous", + "update-common", "update-engine", "wicket-common", "wicket-dbg", @@ -130,6 +131,7 @@ default-members = [ "test-utils", "tufaceous-lib", "tufaceous", + "update-common", "update-engine", "wicket-common", "wicket-dbg", @@ -386,6 +388,7 @@ trybuild = "1.0.85" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } unicode-width = "0.1.11" +update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.3" uuid = { version = "1.6.1", features = ["serde", "v4"] } diff --git a/update-common/Cargo.toml b/update-common/Cargo.toml new file mode 100644 index 0000000000..cc2ee86232 --- /dev/null +++ b/update-common/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "update-common" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +bytes.workspace = true +camino.workspace = true +camino-tempfile.workspace = true +debug-ignore.workspace = true +display-error-chain.workspace = true +dropshot.workspace = true +futures.workspace = true +hex.workspace = true +hubtools.workspace = true +omicron-common.workspace = true +sha2.workspace = true +slog.workspace = true +thiserror.workspace = true +tokio.workspace = true +tokio-util.workspace = true +tough.workspace = true +tufaceous-lib.workspace = true +omicron-workspace-hack.workspace = true + +[dev-dependencies] +clap.workspace = true +omicron-test-utils.workspace = true +rand.workspace = true +tufaceous.workspace = true diff --git a/update-common/src/artifacts/artifact_types.rs b/update-common/src/artifacts/artifact_types.rs new file mode 100644 index 0000000000..e70970993a --- /dev/null +++ b/update-common/src/artifacts/artifact_types.rs @@ -0,0 +1,31 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! General types for artifacts that don't quite fit into the other modules. + +use std::borrow::Borrow; + +use omicron_common::update::ArtifactId; + +use super::ExtractedArtifactDataHandle; + +/// A pair containing both the ID of an artifact and a handle to its data. +/// +/// Note that cloning an `ArtifactIdData` will clone the handle, which has +/// implications on temporary directory cleanup. See +/// [`ExtractedArtifactDataHandle`] for details. +#[derive(Debug, Clone)] +pub struct ArtifactIdData { + pub id: ArtifactId, + pub data: ExtractedArtifactDataHandle, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Board(pub String); + +impl Borrow for Board { + fn borrow(&self) -> &String { + &self.0 + } +} diff --git a/wicketd/src/artifacts/artifacts_with_plan.rs b/update-common/src/artifacts/artifacts_with_plan.rs similarity index 96% rename from wicketd/src/artifacts/artifacts_with_plan.rs rename to update-common/src/artifacts/artifacts_with_plan.rs index d3319d7f6b..94c7294d48 100644 --- a/wicketd/src/artifacts/artifacts_with_plan.rs +++ b/update-common/src/artifacts/artifacts_with_plan.rs @@ -2,10 +2,10 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::error::RepositoryError; -use super::update_plan::UpdatePlanBuilder; use super::ExtractedArtifactDataHandle; use super::UpdatePlan; +use super::UpdatePlanBuilder; +use crate::errors::RepositoryError; use camino_tempfile::Utf8TempDir; use debug_ignore::DebugIgnore; use omicron_common::update::ArtifactHash; @@ -22,7 +22,7 @@ use tufaceous_lib::OmicronRepo; /// A collection of artifacts along with an update plan using those artifacts. #[derive(Debug)] -pub(super) struct ArtifactsWithPlan { +pub struct ArtifactsWithPlan { // Map of top-level artifact IDs (present in the TUF repo) to the actual // artifacts we're serving (e.g., a top-level RoT artifact will map to two // artifact hashes: one for each of the A and B images). @@ -50,7 +50,7 @@ pub(super) struct ArtifactsWithPlan { } impl ArtifactsWithPlan { - pub(super) async fn from_zip( + pub async fn from_zip( zip_data: T, log: &Logger, ) -> Result @@ -81,7 +81,7 @@ impl ArtifactsWithPlan { // these are just direct copies of artifacts we just unpacked into // `dir`, but we'll also unpack nested artifacts like the RoT dual A/B // archives. - let mut plan_builder = + let mut builder = UpdatePlanBuilder::new(artifacts.system_version, log)?; // Make a pass through each artifact in the repo. For each artifact, we @@ -146,7 +146,7 @@ impl ArtifactsWithPlan { RepositoryError::MissingTarget(artifact.target.clone()) })?; - plan_builder + builder .add_artifact( artifact.into_id(), artifact_hash, @@ -159,12 +159,12 @@ impl ArtifactsWithPlan { // Ensure we know how to apply updates from this set of artifacts; we'll // remember the plan we create. - let plan = plan_builder.build()?; + let artifacts = builder.build()?; - Ok(Self { by_id, by_hash: by_hash.into(), plan }) + Ok(Self { by_id, by_hash: by_hash.into(), plan: artifacts }) } - pub(super) fn by_id(&self) -> &BTreeMap> { + pub fn by_id(&self) -> &BTreeMap> { &self.by_id } @@ -175,11 +175,11 @@ impl ArtifactsWithPlan { &self.by_hash } - pub(super) fn plan(&self) -> &UpdatePlan { + pub fn plan(&self) -> &UpdatePlan { &self.plan } - pub(super) fn get_by_hash( + pub fn get_by_hash( &self, id: &ArtifactHashId, ) -> Option { diff --git a/wicketd/src/artifacts/extracted_artifacts.rs b/update-common/src/artifacts/extracted_artifacts.rs similarity index 95% rename from wicketd/src/artifacts/extracted_artifacts.rs rename to update-common/src/artifacts/extracted_artifacts.rs index 5683cd1c13..06e0e5ec65 100644 --- a/wicketd/src/artifacts/extracted_artifacts.rs +++ b/update-common/src/artifacts/extracted_artifacts.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::error::RepositoryError; +use crate::errors::RepositoryError; use anyhow::Context; use camino::Utf8PathBuf; use camino_tempfile::NamedUtf8TempFile; @@ -39,7 +39,7 @@ use tokio_util::io::ReaderStream; /// contexts where you need the data and need the temporary directory containing /// it to stick around. #[derive(Debug, Clone)] -pub(crate) struct ExtractedArtifactDataHandle { +pub struct ExtractedArtifactDataHandle { tempdir: Arc, file_size: usize, hash_id: ArtifactHashId, @@ -61,11 +61,11 @@ impl Eq for ExtractedArtifactDataHandle {} impl ExtractedArtifactDataHandle { /// File size of this artifact in bytes. - pub(crate) fn file_size(&self) -> usize { + pub fn file_size(&self) -> usize { self.file_size } - pub(crate) fn hash(&self) -> ArtifactHash { + pub fn hash(&self) -> ArtifactHash { self.hash_id.hash } @@ -73,7 +73,7 @@ impl ExtractedArtifactDataHandle { /// /// This can fail due to I/O errors outside our control (e.g., something /// removed the contents of our temporary directory). - pub(crate) async fn reader_stream( + pub async fn reader_stream( &self, ) -> anyhow::Result> { let path = path_for_artifact(&self.tempdir, &self.hash_id); @@ -96,7 +96,7 @@ impl ExtractedArtifactDataHandle { /// (e.g., when a new TUF repository is uploaded). The handles can be used to /// on-demand read files that were copied into the temp dir during ingest. #[derive(Debug)] -pub(crate) struct ExtractedArtifacts { +pub struct ExtractedArtifacts { // Directory in which we store extracted artifacts. This is currently a // single flat directory with files named by artifact hash; we don't expect // more than a few dozen files total, so no need to nest directories. @@ -104,7 +104,7 @@ pub(crate) struct ExtractedArtifacts { } impl ExtractedArtifacts { - pub(super) fn new(log: &Logger) -> Result { + pub fn new(log: &Logger) -> Result { let tempdir = camino_tempfile::Builder::new() .prefix("wicketd-update-artifacts.") .tempdir() @@ -125,7 +125,7 @@ impl ExtractedArtifacts { /// Copy from `stream` into our temp directory, returning a handle to the /// extracted artifact on success. - pub(super) async fn store( + pub async fn store( &mut self, artifact_hash_id: ArtifactHashId, stream: impl Stream>, @@ -185,7 +185,7 @@ impl ExtractedArtifacts { /// As the returned file is written to, the data will be hashed; once /// writing is complete, call [`ExtractedArtifacts::store_tempfile()`] to /// persist the temporary file into an [`ExtractedArtifactDataHandle`]. - pub(super) fn new_tempfile( + pub fn new_tempfile( &self, ) -> Result { let file = NamedUtf8TempFile::new_in(self.tempdir.path()).map_err( @@ -203,7 +203,7 @@ impl ExtractedArtifacts { /// Persist a temporary file that was returned by /// [`ExtractedArtifacts::new_tempfile()`] as an extracted artifact. - pub(super) fn store_tempfile( + pub fn store_tempfile( &self, kind: ArtifactKind, file: HashingNamedUtf8TempFile, @@ -249,7 +249,7 @@ fn path_for_artifact( } // Wrapper around a `NamedUtf8TempFile` that hashes contents as they're written. -pub(super) struct HashingNamedUtf8TempFile { +pub struct HashingNamedUtf8TempFile { file: io::BufWriter, hasher: Sha256, bytes_written: usize, diff --git a/update-common/src/artifacts/mod.rs b/update-common/src/artifacts/mod.rs new file mode 100644 index 0000000000..d68c488599 --- /dev/null +++ b/update-common/src/artifacts/mod.rs @@ -0,0 +1,15 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types to represent update artifacts. + +mod artifact_types; +mod artifacts_with_plan; +mod extracted_artifacts; +mod update_plan; + +pub use artifact_types::*; +pub use artifacts_with_plan::*; +pub use extracted_artifacts::*; +pub use update_plan::*; diff --git a/wicketd/src/artifacts/update_plan.rs b/update-common/src/artifacts/update_plan.rs similarity index 97% rename from wicketd/src/artifacts/update_plan.rs rename to update-common/src/artifacts/update_plan.rs index c6db7c1b65..e30389f646 100644 --- a/wicketd/src/artifacts/update_plan.rs +++ b/update-common/src/artifacts/update_plan.rs @@ -8,12 +8,12 @@ //! apply to which components; the ordering and application of the plan lives //! elsewhere. -use super::error::RepositoryError; -use super::extracted_artifacts::ExtractedArtifacts; -use super::extracted_artifacts::HashingNamedUtf8TempFile; use super::ArtifactIdData; use super::Board; use super::ExtractedArtifactDataHandle; +use super::ExtractedArtifacts; +use super::HashingNamedUtf8TempFile; +use crate::errors::RepositoryError; use bytes::Bytes; use futures::Stream; use futures::StreamExt; @@ -34,21 +34,20 @@ use std::io; use tufaceous_lib::HostPhaseImages; use tufaceous_lib::RotArchives; -/// The update plan currently in effect. -/// -/// Exposed for testing. +/// Artifacts with their hashes and sources, as obtained from an uploaded +/// repository. #[derive(Debug, Clone)] pub struct UpdatePlan { - pub(crate) system_version: SemverVersion, - pub(crate) gimlet_sp: BTreeMap, - pub(crate) gimlet_rot_a: Vec, - pub(crate) gimlet_rot_b: Vec, - pub(crate) psc_sp: BTreeMap, - pub(crate) psc_rot_a: Vec, - pub(crate) psc_rot_b: Vec, - pub(crate) sidecar_sp: BTreeMap, - pub(crate) sidecar_rot_a: Vec, - pub(crate) sidecar_rot_b: Vec, + pub system_version: SemverVersion, + pub gimlet_sp: BTreeMap, + pub gimlet_rot_a: Vec, + pub gimlet_rot_b: Vec, + pub psc_sp: BTreeMap, + pub psc_rot_a: Vec, + pub psc_rot_b: Vec, + pub sidecar_sp: BTreeMap, + pub sidecar_rot_a: Vec, + pub sidecar_rot_b: Vec, // Note: The Trampoline image is broken into phase1/phase2 as part of our // update plan (because they go to different destinations), but the two @@ -58,21 +57,17 @@ pub struct UpdatePlan { // The same would apply to the host phase1/phase2, but we don't actually // need the `host_phase_2` data as part of this plan (we serve it from the // artifact server instead). - pub(crate) host_phase_1: ArtifactIdData, - pub(crate) trampoline_phase_1: ArtifactIdData, - pub(crate) trampoline_phase_2: ArtifactIdData, + pub host_phase_1: ArtifactIdData, + pub trampoline_phase_1: ArtifactIdData, + pub trampoline_phase_2: ArtifactIdData, // We need to send installinator the hash of the host_phase_2 data it should // fetch from us; we compute it while generating the plan. - // - // This is exposed for testing. pub host_phase_2_hash: ArtifactHash, // We also need to send installinator the hash of the control_plane image it // should fetch from us. This is already present in the TUF repository, but // we record it here for use by the update process. - // - // This is exposed for testing. pub control_plane_hash: ArtifactHash, } @@ -81,7 +76,7 @@ pub struct UpdatePlan { /// [`UpdatePlanBuilder::build()`] will (fallibly) convert from the builder to /// the final plan. #[derive(Debug)] -pub(super) struct UpdatePlanBuilder<'a> { +pub struct UpdatePlanBuilder<'a> { // fields that mirror `UpdatePlan` system_version: SemverVersion, gimlet_sp: BTreeMap, @@ -118,7 +113,7 @@ pub(super) struct UpdatePlanBuilder<'a> { } impl<'a> UpdatePlanBuilder<'a> { - pub(super) fn new( + pub fn new( system_version: SemverVersion, log: &'a Logger, ) -> Result { @@ -145,7 +140,7 @@ impl<'a> UpdatePlanBuilder<'a> { }) } - pub(super) async fn add_artifact( + pub async fn add_artifact( &mut self, artifact_id: ArtifactId, artifact_hash: ArtifactHash, @@ -665,7 +660,7 @@ impl<'a> UpdatePlanBuilder<'a> { Ok((image1, image2)) } - pub(super) fn build(self) -> Result { + pub fn build(self) -> Result { // Ensure our multi-board-supporting kinds have at least one board // present. for (kind, no_artifacts) in [ diff --git a/wicketd/src/artifacts/error.rs b/update-common/src/errors.rs similarity index 98% rename from wicketd/src/artifacts/error.rs rename to update-common/src/errors.rs index ada8fbe011..5fba43b944 100644 --- a/wicketd/src/artifacts/error.rs +++ b/update-common/src/errors.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Error types for this crate. + use camino::Utf8PathBuf; use display_error_chain::DisplayErrorChain; use dropshot::HttpError; @@ -12,7 +14,7 @@ use slog::error; use thiserror::Error; #[derive(Debug, Error)] -pub(super) enum RepositoryError { +pub enum RepositoryError { #[error("error opening archive")] OpenArchive(#[source] anyhow::Error), @@ -129,7 +131,7 @@ pub(super) enum RepositoryError { } impl RepositoryError { - pub(super) fn to_http_error(&self) -> HttpError { + pub fn to_http_error(&self) -> HttpError { let message = DisplayErrorChain::new(self).to_string(); match self { diff --git a/update-common/src/lib.rs b/update-common/src/lib.rs new file mode 100644 index 0000000000..b1f0d88484 --- /dev/null +++ b/update-common/src/lib.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common update types and code shared between wicketd and Nexus. + +pub mod artifacts; +pub mod errors; diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 97550342d0..83e7bf33ca 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -56,6 +56,7 @@ omicron-common.workspace = true omicron-passwords.workspace = true sled-hardware.workspace = true tufaceous-lib.workspace = true +update-common.workspace = true update-engine.workspace = true wicket-common.workspace = true wicketd-client.workspace = true diff --git a/wicketd/src/artifacts.rs b/wicketd/src/artifacts.rs index 7b55d73dcb..3e5854d17e 100644 --- a/wicketd/src/artifacts.rs +++ b/wicketd/src/artifacts.rs @@ -2,37 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use omicron_common::update::ArtifactId; -use std::borrow::Borrow; - -mod artifacts_with_plan; -mod error; -mod extracted_artifacts; mod server; mod store; -mod update_plan; -pub(crate) use self::extracted_artifacts::ExtractedArtifactDataHandle; pub(crate) use self::server::WicketdArtifactServer; pub(crate) use self::store::WicketdArtifactStore; -pub use self::update_plan::UpdatePlan; - -/// A pair containing both the ID of an artifact and a handle to its data. -/// -/// Note that cloning an `ArtifactIdData` will clone the handle, which has -/// implications on temporary directory cleanup. See -/// [`ExtractedArtifactDataHandle`] for details. -#[derive(Debug, Clone)] -pub(crate) struct ArtifactIdData { - pub(crate) id: ArtifactId, - pub(crate) data: ExtractedArtifactDataHandle, -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct Board(pub(crate) String); - -impl Borrow for Board { - fn borrow(&self) -> &String { - &self.0 - } -} diff --git a/wicketd/src/artifacts/store.rs b/wicketd/src/artifacts/store.rs index 2a7b4a646b..a5f24993a8 100644 --- a/wicketd/src/artifacts/store.rs +++ b/wicketd/src/artifacts/store.rs @@ -2,9 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::artifacts_with_plan::ArtifactsWithPlan; -use super::ExtractedArtifactDataHandle; -use super::UpdatePlan; use crate::http_entrypoints::InstallableArtifacts; use dropshot::HttpError; use omicron_common::api::external::SemverVersion; @@ -13,6 +10,9 @@ use slog::Logger; use std::io; use std::sync::Arc; use std::sync::Mutex; +use update_common::artifacts::ArtifactsWithPlan; +use update_common::artifacts::ExtractedArtifactDataHandle; +use update_common::artifacts::UpdatePlan; /// The artifact store for wicketd. /// diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index 336333f899..823a7964de 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -4,8 +4,6 @@ // Copyright 2023 Oxide Computer Company -use crate::artifacts::ArtifactIdData; -use crate::artifacts::UpdatePlan; use crate::artifacts::WicketdArtifactStore; use crate::helpers::sps_to_string; use crate::http_entrypoints::ClearUpdateStateResponse; @@ -65,6 +63,8 @@ use tokio::sync::watch; use tokio::sync::Mutex; use tokio::task::JoinHandle; use tokio_util::io::StreamReader; +use update_common::artifacts::ArtifactIdData; +use update_common::artifacts::UpdatePlan; use update_engine::events::ProgressUnits; use update_engine::AbortHandle; use update_engine::StepSpec; From 18d0c43bda66b661d0f321bf7e883c0388106eb7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:24:17 -0800 Subject: [PATCH 137/186] Update Rust crate atomicwrites to 0.4.3 (#4748) --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 738d046235..759eea9143 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,13 +328,13 @@ checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" [[package]] name = "atomicwrites" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4d45f362125ed144544e57b0ec6de8fd6a296d41a6252fc4a20c0cf12e9ed3a" +checksum = "fc7b2dbe9169059af0f821e811180fddc971fc210c776c133c7819ccd6e478db" dependencies = [ "rustix 0.38.25", "tempfile", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f96d0f2914..573bffb5b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,7 +148,7 @@ assert_matches = "1.5.0" assert_cmd = "2.0.12" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } async-trait = "0.1.77" -atomicwrites = "0.4.2" +atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.21.5" From dfd6372b53b05447c86fa91cbb9a4b6d2f6dc8f4 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:24:49 -0800 Subject: [PATCH 138/186] Update Rust crate futures to 0.3.30 (#4750) --- Cargo.lock | 36 ++++++++++++++++++------------------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 28 ++++++++++++++-------------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 759eea9143..cfbfccb23a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2418,9 +2418,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -2433,9 +2433,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -2443,15 +2443,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -2460,15 +2460,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", @@ -2477,15 +2477,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-timer" @@ -2495,9 +2495,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", diff --git a/Cargo.toml b/Cargo.toml index 573bffb5b3..596e1e4443 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -198,7 +198,7 @@ flate2 = "1.0.28" flume = "0.11.0" foreign-types = "0.3.2" fs-err = "2.11.0" -futures = "0.3.29" +futures = "0.3.30" gateway-client = { path = "clients/gateway-client" } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", default-features = false, features = ["std"] } gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 4ff9d05374..e42a95a824 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -41,13 +41,13 @@ either = { version = "1.9.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } -futures = { version = "0.3.29" } -futures-channel = { version = "0.3.29", features = ["sink"] } -futures-core = { version = "0.3.29" } -futures-io = { version = "0.3.29", default-features = false, features = ["std"] } -futures-sink = { version = "0.3.29" } -futures-task = { version = "0.3.29", default-features = false, features = ["std"] } -futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } +futures = { version = "0.3.30" } +futures-channel = { version = "0.3.30", features = ["sink"] } +futures-core = { version = "0.3.30" } +futures-io = { version = "0.3.30", default-features = false, features = ["std"] } +futures-sink = { version = "0.3.30" } +futures-task = { version = "0.3.30", default-features = false, features = ["std"] } +futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } @@ -144,13 +144,13 @@ either = { version = "1.9.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } -futures = { version = "0.3.29" } -futures-channel = { version = "0.3.29", features = ["sink"] } -futures-core = { version = "0.3.29" } -futures-io = { version = "0.3.29", default-features = false, features = ["std"] } -futures-sink = { version = "0.3.29" } -futures-task = { version = "0.3.29", default-features = false, features = ["std"] } -futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } +futures = { version = "0.3.30" } +futures-channel = { version = "0.3.30", features = ["sink"] } +futures-core = { version = "0.3.30" } +futures-io = { version = "0.3.30", default-features = false, features = ["std"] } +futures-sink = { version = "0.3.30" } +futures-task = { version = "0.3.30", default-features = false, features = ["std"] } +futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } From 12e6eb2a186fa86eb044c579d068c2a5b52b5c94 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:25:10 -0800 Subject: [PATCH 139/186] Update Rust crate derive-where to 1.2.7 (#4749) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cfbfccb23a..15496909d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1644,9 +1644,9 @@ dependencies = [ [[package]] name = "derive-where" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d9b1fc2a6d7e19c89e706a3769e31ee862ac7a4c810c7c0ff3910e1a42a4ce" +checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 596e1e4443..a9387fac6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -183,7 +183,7 @@ ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.17" -derive-where = "1.2.6" +derive-where = "1.2.7" diesel = { version = "2.1.4", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } From 5a81ef91911df102bc148cf37c80e4c29d77e029 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 4 Jan 2024 13:09:51 -0800 Subject: [PATCH 140/186] add sled agents, Omicron zones to inventory (#4727) --- Cargo.lock | 7 + clients/sled-agent-client/src/lib.rs | 67 +- common/src/api/external/mod.rs | 4 +- dev-tools/omdb/src/bin/omdb/db.rs | 53 ++ nexus/db-model/src/inventory.rs | 633 +++++++++++++++++- nexus/db-model/src/lib.rs | 2 +- nexus/db-model/src/schema.rs | 74 +- .../db-queries/src/db/datastore/inventory.rs | 477 ++++++++++++- nexus/db-queries/src/db/pool_connection.rs | 2 + nexus/inventory/Cargo.toml | 6 + .../inventory/example-data/madrid-sled14.json | 214 ++++++ .../inventory/example-data/madrid-sled16.json | 206 ++++++ .../inventory/example-data/madrid-sled17.json | 172 +++++ nexus/inventory/src/builder.rs | 174 ++++- nexus/inventory/src/collector.rs | 259 ++++++- nexus/inventory/src/examples.rs | 141 +++- nexus/inventory/src/lib.rs | 4 + nexus/inventory/src/sled_agent_enumerator.rs | 44 ++ .../tests/output/collector_basic.txt | 14 + .../tests/output/collector_errors.txt | 2 + .../output/collector_sled_agent_errors.txt | 80 +++ .../app/background/inventory_collection.rs | 138 ++++ nexus/src/app/sled.rs | 6 +- nexus/test-utils/src/lib.rs | 41 +- nexus/types/Cargo.toml | 1 + nexus/types/src/inventory.rs | 50 +- openapi/sled-agent.json | 64 ++ schema/crdb/22.0.0/up01.sql | 4 + schema/crdb/22.0.0/up02.sql | 19 + schema/crdb/22.0.0/up03.sql | 11 + schema/crdb/22.0.0/up04.sql | 13 + schema/crdb/22.0.0/up05.sql | 41 ++ schema/crdb/22.0.0/up06.sql | 13 + schema/crdb/dbinit.sql | 157 ++++- sled-agent/src/bin/sled-agent-sim.rs | 31 +- sled-agent/src/http_entrypoints.rs | 19 +- sled-agent/src/params.rs | 29 +- sled-agent/src/sim/config.rs | 60 +- sled-agent/src/sim/http_entrypoints.rs | 45 +- sled-agent/src/sim/mod.rs | 5 +- sled-agent/src/sim/server.rs | 103 +-- sled-agent/src/sim/sled_agent.rs | 78 ++- sled-agent/src/sled_agent.rs | 66 +- 43 files changed, 3462 insertions(+), 167 deletions(-) create mode 100644 nexus/inventory/example-data/madrid-sled14.json create mode 100644 nexus/inventory/example-data/madrid-sled16.json create mode 100644 nexus/inventory/example-data/madrid-sled17.json create mode 100644 nexus/inventory/src/sled_agent_enumerator.rs create mode 100644 nexus/inventory/tests/output/collector_sled_agent_errors.txt create mode 100644 schema/crdb/22.0.0/up01.sql create mode 100644 schema/crdb/22.0.0/up02.sql create mode 100644 schema/crdb/22.0.0/up03.sql create mode 100644 schema/crdb/22.0.0/up04.sql create mode 100644 schema/crdb/22.0.0/up05.sql create mode 100644 schema/crdb/22.0.0/up06.sql diff --git a/Cargo.lock b/Cargo.lock index 15496909d2..3cc7d09d82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4230,12 +4230,18 @@ dependencies = [ "base64", "chrono", "expectorate", + "futures", "gateway-client", "gateway-messages", "gateway-test-utils", "nexus-types", + "omicron-common", + "omicron-sled-agent", "omicron-workspace-hack", "regex", + "reqwest", + "serde_json", + "sled-agent-client", "slog", "strum", "thiserror", @@ -4321,6 +4327,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "sled-agent-client", "steno", "strum", "uuid", diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 0bbd27cf3e..89f41d10a6 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -10,7 +10,7 @@ use uuid::Uuid; progenitor::generate_api!( spec = "../../openapi/sled-agent.json", - derives = [ schemars::JsonSchema ], + derives = [ schemars::JsonSchema, PartialEq ], inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; @@ -25,7 +25,6 @@ progenitor::generate_api!( //TODO trade the manual transformations later in this file for the // replace directives below? replace = { - //Ipv4Network = ipnetwork::Ipv4Network, SwitchLocation = omicron_common::api::external::SwitchLocation, Ipv6Network = ipnetwork::Ipv6Network, IpNetwork = ipnetwork::IpNetwork, @@ -34,6 +33,37 @@ progenitor::generate_api!( } ); +// We cannot easily configure progenitor to derive `Eq` on all the client- +// generated types because some have floats and other types that can't impl +// `Eq`. We impl it explicitly for a few types on which we need it. +impl Eq for types::OmicronZonesConfig {} +impl Eq for types::OmicronZoneConfig {} +impl Eq for types::OmicronZoneType {} +impl Eq for types::OmicronZoneDataset {} + +impl types::OmicronZoneType { + /// Human-readable label describing what kind of zone this is + /// + /// This is just use for testing and reporting. + pub fn label(&self) -> impl std::fmt::Display { + match self { + types::OmicronZoneType::BoundaryNtp { .. } => "boundary_ntp", + types::OmicronZoneType::Clickhouse { .. } => "clickhouse", + types::OmicronZoneType::ClickhouseKeeper { .. } => { + "clickhouse_keeper" + } + types::OmicronZoneType::CockroachDb { .. } => "cockroach_db", + types::OmicronZoneType::Crucible { .. } => "crucible", + types::OmicronZoneType::CruciblePantry { .. } => "crucible_pantry", + types::OmicronZoneType::ExternalDns { .. } => "external_dns", + types::OmicronZoneType::InternalDns { .. } => "internal_dns", + types::OmicronZoneType::InternalNtp { .. } => "internal_ntp", + types::OmicronZoneType::Nexus { .. } => "nexus", + types::OmicronZoneType::Oximeter { .. } => "oximeter", + } + } +} + impl omicron_common::api::external::ClientError for types::Error { fn message(&self) -> String { self.message.clone() @@ -245,6 +275,12 @@ impl From<&omicron_common::api::external::Name> for types::Name { } } +impl From for omicron_common::api::external::Name { + fn from(s: types::Name) -> Self { + Self::try_from(s.as_str().to_owned()).unwrap() + } +} + impl From for types::Vni { fn from(v: omicron_common::api::external::Vni) -> Self { Self(u32::from(v)) @@ -264,6 +300,12 @@ impl From for types::MacAddr { } } +impl From for omicron_common::api::external::MacAddr { + fn from(s: types::MacAddr) -> Self { + s.parse().unwrap() + } +} + impl From for types::Ipv4Net { fn from(n: omicron_common::api::external::Ipv4Net) -> Self { Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e)) @@ -292,6 +334,12 @@ impl From for types::Ipv4Net { } } +impl From for ipnetwork::Ipv4Network { + fn from(n: types::Ipv4Net) -> Self { + n.parse().unwrap() + } +} + impl From for types::Ipv4Network { fn from(n: ipnetwork::Ipv4Network) -> Self { Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e)) @@ -304,6 +352,12 @@ impl From for types::Ipv6Net { } } +impl From for ipnetwork::Ipv6Network { + fn from(n: types::Ipv6Net) -> Self { + n.parse().unwrap() + } +} + impl From for types::IpNet { fn from(n: ipnetwork::IpNetwork) -> Self { use ipnetwork::IpNetwork; @@ -314,6 +368,15 @@ impl From for types::IpNet { } } +impl From for ipnetwork::IpNetwork { + fn from(n: types::IpNet) -> Self { + match n { + types::IpNet::V4(v4) => ipnetwork::IpNetwork::V4(v4.into()), + types::IpNet::V6(v6) => ipnetwork::IpNetwork::V6(v6.into()), + } + } +} + impl From for types::Ipv4Net { fn from(n: std::net::Ipv4Addr) -> Self { Self::try_from(format!("{n}/32")) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 446152137a..3b05c58df3 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -525,7 +525,9 @@ impl JsonSchema for RoleName { // to serialize the value. // // TODO: custom JsonSchema and Deserialize impls to enforce i64::MAX limit -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] pub struct ByteCount(u64); #[allow(non_upper_case_globals)] diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 08a783d8c8..ad7ab35455 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2472,6 +2472,7 @@ async fn cmd_db_inventory_collections_show( inv_collection_print(&collection).await?; let nerrors = inv_collection_print_errors(&collection).await?; inv_collection_print_devices(&collection, &long_string_formatter).await?; + inv_collection_print_sleds(&collection); if nerrors > 0 { eprintln!( @@ -2703,6 +2704,58 @@ async fn inv_collection_print_devices( Ok(()) } +fn inv_collection_print_sleds(collection: &Collection) { + println!("SLED AGENTS"); + for sled in collection.sled_agents.values() { + println!( + "\nsled {} (role = {:?}, serial {})", + sled.sled_id, + sled.sled_role, + match &sled.baseboard_id { + Some(baseboard_id) => &baseboard_id.serial_number, + None => "unknown", + }, + ); + println!( + " found at: {} from {}", + sled.time_collected, sled.source + ); + println!(" address: {}", sled.sled_agent_address); + println!(" usable hw threads: {}", sled.usable_hardware_threads); + println!( + " usable memory (GiB): {}", + sled.usable_physical_ram.to_whole_gibibytes() + ); + println!( + " reservoir (GiB): {}", + sled.reservoir_size.to_whole_gibibytes() + ); + + if let Some(zones) = collection.omicron_zones.get(&sled.sled_id) { + println!( + " zones collected from {} at {}", + zones.source, zones.time_collected, + ); + println!( + " zones generation: {} (count: {})", + *zones.zones.generation, + zones.zones.zones.len() + ); + + if zones.zones.zones.is_empty() { + continue; + } + + println!(" ZONES FOUND"); + for z in &zones.zones.zones { + println!(" zone {} (type {})", z.id, z.zone_type.label()); + } + } else { + println!(" warning: no zone information found"); + } + } +} + #[derive(Debug)] struct LongStringFormatter { show_long_strings: bool, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 72671fde98..47e2033718 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -6,10 +6,16 @@ use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, - inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - sw_caboose, sw_root_of_trust_page, + inv_omicron_zone, inv_omicron_zone_nic, inv_root_of_trust, + inv_root_of_trust_page, inv_service_processor, inv_sled_agent, + inv_sled_omicron_zones, sw_caboose, sw_root_of_trust_page, }; -use crate::{impl_enum_type, SqlU16, SqlU32}; +use crate::{ + impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, SqlU16, SqlU32, + SqlU8, +}; +use anyhow::{anyhow, ensure}; +use anyhow::{bail, Context}; use chrono::DateTime; use chrono::Utc; use diesel::backend::Backend; @@ -18,9 +24,12 @@ use diesel::expression::AsExpression; use diesel::pg::Pg; use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; +use ipnetwork::IpNetwork; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, + BaseboardId, Caboose, Collection, OmicronZoneType, PowerState, RotPage, + RotSlot, }; +use std::net::SocketAddrV6; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -538,3 +547,619 @@ pub struct InvRotPage { pub which: RotPageWhich, pub sw_root_of_trust_page_id: Uuid, } + +// See [`nexus_types::inventory::SledRole`]. +impl_enum_type!( + #[derive(SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "sled_role"))] + pub struct SledRoleEnum; + + #[derive( + Copy, + Clone, + Debug, + AsExpression, + FromSqlRow, + PartialOrd, + Ord, + PartialEq, + Eq + )] + #[diesel(sql_type = SledRoleEnum)] + pub enum SledRole; + + // Enum values + Gimlet => b"gimlet" + Scrimlet => b"scrimlet" +); + +impl From for SledRole { + fn from(value: nexus_types::inventory::SledRole) -> Self { + match value { + nexus_types::inventory::SledRole::Gimlet => SledRole::Gimlet, + nexus_types::inventory::SledRole::Scrimlet => SledRole::Scrimlet, + } + } +} + +impl From for nexus_types::inventory::SledRole { + fn from(value: SledRole) -> Self { + match value { + SledRole::Gimlet => nexus_types::inventory::SledRole::Gimlet, + SledRole::Scrimlet => nexus_types::inventory::SledRole::Scrimlet, + } + } +} + +/// See [`nexus_types::inventory::SledAgent`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_sled_agent)] +pub struct InvSledAgent { + pub inv_collection_id: Uuid, + pub time_collected: DateTime, + pub source: String, + pub sled_id: Uuid, + pub hw_baseboard_id: Option, + pub sled_agent_ip: ipv6::Ipv6Addr, + pub sled_agent_port: SqlU16, + pub sled_role: SledRole, + pub usable_hardware_threads: SqlU32, + pub usable_physical_ram: ByteCount, + pub reservoir_size: ByteCount, +} + +impl InvSledAgent { + pub fn new_without_baseboard( + collection_id: Uuid, + sled_agent: &nexus_types::inventory::SledAgent, + ) -> Result { + // It's irritating to have to check this case at runtime. The challenge + // is that if this sled agent does have a baseboard id, we don't know + // what its (SQL) id is. The only way to get it is to query it from + // the database. As a result, the caller takes a wholly different code + // path for that case that doesn't even involve constructing one of + // these objects. (In fact, we never see the id in Rust.) + // + // To check this at compile time, we'd have to bifurcate + // `nexus_types::inventory::SledAgent` into an enum with two variants: + // one with a baseboard id and one without. This would muck up all the + // other consumers of this type, just for a highly database-specific + // concern. + if sled_agent.baseboard_id.is_some() { + Err(anyhow!( + "attempted to directly insert InvSledAgent with \ + non-null baseboard id" + )) + } else { + Ok(InvSledAgent { + inv_collection_id: collection_id, + time_collected: sled_agent.time_collected, + source: sled_agent.source.clone(), + sled_id: sled_agent.sled_id, + hw_baseboard_id: None, + sled_agent_ip: ipv6::Ipv6Addr::from( + *sled_agent.sled_agent_address.ip(), + ), + sled_agent_port: SqlU16(sled_agent.sled_agent_address.port()), + sled_role: SledRole::from(sled_agent.sled_role), + usable_hardware_threads: SqlU32( + sled_agent.usable_hardware_threads, + ), + usable_physical_ram: ByteCount::from( + sled_agent.usable_physical_ram, + ), + reservoir_size: ByteCount::from(sled_agent.reservoir_size), + }) + } + } +} + +/// See [`nexus_types::inventory::OmicronZonesFound`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_sled_omicron_zones)] +pub struct InvSledOmicronZones { + pub inv_collection_id: Uuid, + pub time_collected: DateTime, + pub source: String, + pub sled_id: Uuid, + pub generation: Generation, +} + +impl InvSledOmicronZones { + pub fn new( + inv_collection_id: Uuid, + zones_found: &nexus_types::inventory::OmicronZonesFound, + ) -> InvSledOmicronZones { + InvSledOmicronZones { + inv_collection_id, + time_collected: zones_found.time_collected, + source: zones_found.source.clone(), + sled_id: zones_found.sled_id, + generation: Generation(zones_found.zones.generation.clone().into()), + } + } + + pub fn into_uninit_zones_found( + self, + ) -> nexus_types::inventory::OmicronZonesFound { + nexus_types::inventory::OmicronZonesFound { + time_collected: self.time_collected, + source: self.source, + sled_id: self.sled_id, + zones: nexus_types::inventory::OmicronZonesConfig { + generation: self.generation.0.into(), + zones: Vec::new(), + }, + } + } +} + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "zone_type"))] + pub struct ZoneTypeEnum; + + #[derive(Clone, Copy, Debug, Eq, AsExpression, FromSqlRow, PartialEq)] + #[diesel(sql_type = ZoneTypeEnum)] + pub enum ZoneType; + + // Enum values + BoundaryNtp => b"boundary_ntp" + Clickhouse => b"clickhouse" + ClickhouseKeeper => b"clickhouse_keeper" + CockroachDb => b"cockroach_db" + Crucible => b"crucible" + CruciblePantry => b"crucible_pantry" + ExternalDns => b"external_dns" + InternalDns => b"internal_dns" + InternalNtp => b"internal_ntp" + Nexus => b"nexus" + Oximeter => b"oximeter" +); + +/// See [`nexus_types::inventory::OmicronZoneConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_zone)] +pub struct InvOmicronZone { + pub inv_collection_id: Uuid, + pub sled_id: Uuid, + pub id: Uuid, + pub underlay_address: ipv6::Ipv6Addr, + pub zone_type: ZoneType, + pub primary_service_ip: ipv6::Ipv6Addr, + pub primary_service_port: SqlU16, + pub second_service_ip: Option, + pub second_service_port: Option, + pub dataset_zpool_name: Option, + pub nic_id: Option, + pub dns_gz_address: Option, + pub dns_gz_address_index: Option, + pub ntp_ntp_servers: Option>, + pub ntp_dns_servers: Option>, + pub ntp_domain: Option, + pub nexus_external_tls: Option, + pub nexus_external_dns_servers: Option>, + pub snat_ip: Option, + pub snat_first_port: Option, + pub snat_last_port: Option, +} + +impl InvOmicronZone { + pub fn new( + inv_collection_id: Uuid, + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result { + let id = zone.id; + let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); + let mut nic_id = None; + let mut dns_gz_address = None; + let mut dns_gz_address_index = None; + let mut ntp_ntp_servers = None; + let mut ntp_dns_servers = None; + let mut ntp_ntp_domain = None; + let mut nexus_external_tls = None; + let mut nexus_external_dns_servers = None; + let mut snat_ip = None; + let mut snat_first_port = None; + let mut snat_last_port = None; + let mut second_service_ip = None; + let mut second_service_port = None; + + let (zone_type, primary_service_sockaddr_str, dataset) = match &zone + .zone_type + { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); + snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); + nic_id = Some(nic.id); + (ZoneType::BoundaryNtp, address, None) + } + OmicronZoneType::Clickhouse { address, dataset } => { + (ZoneType::Clickhouse, address, Some(dataset)) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + (ZoneType::ClickhouseKeeper, address, Some(dataset)) + } + OmicronZoneType::CockroachDb { address, dataset } => { + (ZoneType::CockroachDb, address, Some(dataset)) + } + OmicronZoneType::Crucible { address, dataset } => { + (ZoneType::Crucible, address, Some(dataset)) + } + OmicronZoneType::CruciblePantry { address } => { + (ZoneType::CruciblePantry, address, None) + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => { + nic_id = Some(nic.id); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for external DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::ExternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => { + dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); + dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for internal DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::InternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + (ZoneType::InternalNtp, address, None) + } + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => { + nic_id = Some(nic.id); + nexus_external_tls = Some(*external_tls); + nexus_external_dns_servers = Some(external_dns_servers.clone()); + second_service_ip = Some(*external_ip); + (ZoneType::Nexus, internal_address, None) + } + OmicronZoneType::Oximeter { address } => { + (ZoneType::Oximeter, address, None) + } + }; + + let dataset_zpool_name = + dataset.map(|d| d.pool_name.as_str().to_string()); + let primary_service_sockaddr = primary_service_sockaddr_str + .parse::() + .with_context(|| { + format!( + "parsing socket address for primary IP {:?}", + primary_service_sockaddr_str + ) + })?; + let (primary_service_ip, primary_service_port) = ( + ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), + SqlU16::from(primary_service_sockaddr.port()), + ); + + Ok(InvOmicronZone { + inv_collection_id, + sled_id, + id, + underlay_address, + zone_type, + primary_service_ip, + primary_service_port, + second_service_ip: second_service_ip.map(IpNetwork::from), + second_service_port, + dataset_zpool_name, + nic_id, + dns_gz_address, + dns_gz_address_index, + ntp_ntp_servers, + ntp_dns_servers: ntp_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + ntp_domain: ntp_ntp_domain, + nexus_external_tls, + nexus_external_dns_servers: nexus_external_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + snat_ip, + snat_first_port, + snat_last_port, + }) + } + + pub fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> Result { + let address = SocketAddrV6::new( + std::net::Ipv6Addr::from(self.primary_service_ip), + *self.primary_service_port, + 0, + 0, + ) + .to_string(); + + // Assemble a value that we can use to extract the NIC _if necessary_ + // and report an error if it was needed but not found. + // + // Any error here should be impossible. By the time we get here, the + // caller should have provided `nic_row` iff there's a corresponding + // `nic_id` in this row, and the ids should match up. And whoever + // created this row ought to have provided a nic_id iff this type of + // zone needs a NIC. This last issue is not under our control, though, + // so we definitely want to handle that as an operational error. The + // others could arguably be programmer errors (i.e., we could `assert`), + // but it seems excessive to crash here. + // + // Note that we immediately return for any of the caller errors here. + // For the other error, we will return only later, if some code path + // below tries to use `nic` when it's not present. + let nic = match (self.nic_id, nic_row) { + (Some(expected_id), Some(nic_row)) => { + ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); + Ok(nic_row.into_network_interface_for_zone(self.id)) + } + (None, None) => Err(anyhow!( + "expected zone to have an associated NIC, but it doesn't" + )), + (Some(_), None) => bail!("caller provided no NIC"), + (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), + }; + + // Similarly, assemble a value that we can use to extract the dataset, + // if necessary. We only return this error if code below tries to use + // this value. + let dataset = self + .dataset_zpool_name + .map(|zpool_name| -> Result<_, anyhow::Error> { + Ok(nexus_types::inventory::OmicronZoneDataset { + pool_name: zpool_name.parse().map_err(|e| { + anyhow!("parsing zpool name {:?}: {}", zpool_name, e) + })?, + }) + }) + .transpose()? + .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); + + // Do the same for the DNS server address. + let dns_address = + match (self.second_service_ip, self.second_service_port) { + (Some(dns_ip), Some(dns_port)) => { + Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) + .to_string()) + } + _ => Err(anyhow!( + "expected second service IP and port, \ + found one missing" + )), + }; + + // Do the same for NTP zone properties. + let ntp_dns_servers = self + .ntp_dns_servers + .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) + .map(|list| { + list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() + }); + let ntp_ntp_servers = + self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig { + ip: ip.ip(), + first_port: *first_port, + last_port: *last_port, + } + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + OmicronZoneType::BoundaryNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + ntp_servers: ntp_ntp_servers?, + snat_cfg, + } + } + ZoneType::Clickhouse => { + OmicronZoneType::Clickhouse { address, dataset: dataset? } + } + ZoneType::ClickhouseKeeper => { + OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } + } + ZoneType::CockroachDb => { + OmicronZoneType::CockroachDb { address, dataset: dataset? } + } + ZoneType::Crucible => { + OmicronZoneType::Crucible { address, dataset: dataset? } + } + ZoneType::CruciblePantry => { + OmicronZoneType::CruciblePantry { address } + } + ZoneType::ExternalDns => OmicronZoneType::ExternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + nic: nic?, + }, + ZoneType::InternalDns => OmicronZoneType::InternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + gz_address: *self.dns_gz_address.ok_or_else(|| { + anyhow!("expected dns_gz_address, found none") + })?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ZoneType::InternalNtp => OmicronZoneType::InternalNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + ntp_servers: ntp_ntp_servers?, + }, + ZoneType::Nexus => OmicronZoneType::Nexus { + internal_address: address, + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_ip: self + .second_service_ip + .ok_or_else(|| anyhow!("expected second service IP"))? + .ip(), + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? + .into_iter() + .map(|i| i.ip()) + .collect(), + }, + ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, + }; + Ok(nexus_types::inventory::OmicronZoneConfig { + id: self.id, + underlay_address: std::net::Ipv6Addr::from(self.underlay_address), + zone_type, + }) + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_zone_nic)] +pub struct InvOmicronZoneNic { + inv_collection_id: Uuid, + pub id: Uuid, + name: Name, + ip: IpNetwork, + mac: MacAddr, + subnet: IpNetwork, + vni: SqlU32, + is_primary: bool, + slot: SqlU8, +} + +impl InvOmicronZoneNic { + pub fn new( + inv_collection_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result, anyhow::Error> { + match &zone.zone_type { + OmicronZoneType::ExternalDns { nic, .. } + | OmicronZoneType::BoundaryNtp { nic, .. } + | OmicronZoneType::Nexus { nic, .. } => { + // We do not bother storing the NIC's kind and associated id + // because it should be inferrable from the other information + // that we have. Verify that here. + ensure!( + matches!( + nic.kind, + nexus_types::inventory::NetworkInterfaceKind::Service( + id + ) if id == zone.id + ), + "expected zone's NIC kind to be \"service\" and the \ + id to match the zone's id ({})", + zone.id + ); + + Ok(Some(InvOmicronZoneNic { + inv_collection_id, + id: nic.id, + name: Name::from( + omicron_common::api::external::Name::from( + nic.name.clone(), + ), + ), + ip: IpNetwork::from(nic.ip), + mac: MacAddr::from( + omicron_common::api::external::MacAddr::from( + nic.mac.clone(), + ), + ), + subnet: IpNetwork::from(nic.subnet.clone()), + vni: SqlU32::from(nic.vni.0), + is_primary: nic.primary, + slot: SqlU8::from(nic.slot), + })) + } + _ => Ok(None), + } + } + + pub fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> nexus_types::inventory::NetworkInterface { + nexus_types::inventory::NetworkInterface { + id: self.id, + ip: self.ip.ip(), + kind: nexus_types::inventory::NetworkInterfaceKind::Service( + zone_id, + ), + mac: (*self.mac).into(), + name: (&(*self.name)).into(), + primary: self.is_primary, + slot: *self.slot, + vni: nexus_types::inventory::Vni::from(*self.vni), + subnet: self.subnet.into(), + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 2c3433b2d3..6b89e5a270 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -35,7 +35,7 @@ mod instance_state; mod inventory; mod ip_pool; mod ipv4net; -mod ipv6; +pub mod ipv6; mod ipv6net; mod l4_port_range; mod macaddr; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7f4bf51487..791afa6de4 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(21, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(22, 0, 0); table! { disk (id) { @@ -1331,6 +1331,77 @@ table! { } } +table! { + inv_sled_agent (inv_collection_id, sled_id) { + inv_collection_id -> Uuid, + time_collected -> Timestamptz, + source -> Text, + sled_id -> Uuid, + + hw_baseboard_id -> Nullable, + + sled_agent_ip -> Inet, + sled_agent_port -> Int4, + sled_role -> crate::SledRoleEnum, + usable_hardware_threads -> Int8, + usable_physical_ram -> Int8, + reservoir_size -> Int8, + } +} + +table! { + inv_sled_omicron_zones (inv_collection_id, sled_id) { + inv_collection_id -> Uuid, + time_collected -> Timestamptz, + source -> Text, + sled_id -> Uuid, + + generation -> Int8, + } +} + +table! { + inv_omicron_zone (inv_collection_id, id) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + + id -> Uuid, + underlay_address -> Inet, + zone_type -> crate::ZoneTypeEnum, + + primary_service_ip -> Inet, + primary_service_port -> Int4, + second_service_ip -> Nullable, + second_service_port -> Nullable, + dataset_zpool_name -> Nullable, + nic_id -> Nullable, + dns_gz_address -> Nullable, + dns_gz_address_index -> Nullable, + ntp_ntp_servers -> Nullable>, + ntp_dns_servers -> Nullable>, + ntp_domain -> Nullable, + nexus_external_tls -> Nullable, + nexus_external_dns_servers -> Nullable>, + snat_ip -> Nullable, + snat_first_port -> Nullable, + snat_last_port -> Nullable, + } +} + +table! { + inv_omicron_zone_nic (inv_collection_id, id) { + inv_collection_id -> Uuid, + id -> Uuid, + name -> Text, + ip -> Inet, + mac -> Int8, + subnet -> Inet, + vni -> Int8, + is_primary -> Bool, + slot -> Int2, + } +} + table! { bootstore_keys (key, generation) { key -> Text, @@ -1366,6 +1437,7 @@ allow_tables_to_appear_in_same_query!( sw_root_of_trust_page, inv_root_of_trust_page ); +allow_tables_to_appear_in_same_query!(hw_baseboard_id, inv_sled_agent,); allow_tables_to_appear_in_same_query!( dataset, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 7d880b4ec0..b7ff058234 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -36,12 +36,20 @@ use nexus_db_model::HwRotSlotEnum; use nexus_db_model::InvCaboose; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; +use nexus_db_model::InvOmicronZone; +use nexus_db_model::InvOmicronZoneNic; use nexus_db_model::InvRootOfTrust; use nexus_db_model::InvRotPage; use nexus_db_model::InvServiceProcessor; +use nexus_db_model::InvSledAgent; +use nexus_db_model::InvSledOmicronZones; use nexus_db_model::RotPageWhichEnum; +use nexus_db_model::SledRole; +use nexus_db_model::SledRoleEnum; use nexus_db_model::SpType; use nexus_db_model::SpTypeEnum; +use nexus_db_model::SqlU16; +use nexus_db_model::SqlU32; use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_types::inventory::BaseboardId; @@ -108,6 +116,55 @@ impl DataStore { )) }) .collect::, Error>>()?; + // Partition the sled agents into those with an associated baseboard id + // and those without one. We handle these pretty differently. + let (sled_agents_baseboards, sled_agents_no_baseboards): ( + Vec<_>, + Vec<_>, + ) = collection + .sled_agents + .values() + .partition(|sled_agent| sled_agent.baseboard_id.is_some()); + let sled_agents_no_baseboards = sled_agents_no_baseboards + .into_iter() + .map(|sled_agent| { + assert!(sled_agent.baseboard_id.is_none()); + InvSledAgent::new_without_baseboard(collection_id, sled_agent) + .map_err(|e| Error::internal_error(&e.to_string())) + }) + .collect::, Error>>()?; + + let sled_omicron_zones = collection + .omicron_zones + .values() + .map(|found| InvSledOmicronZones::new(collection_id, found)) + .collect::>(); + let omicron_zones = collection + .omicron_zones + .values() + .flat_map(|found| { + found.zones.zones.iter().map(|found_zone| { + InvOmicronZone::new( + collection_id, + found.sled_id, + found_zone, + ) + .map_err(|e| Error::internal_error(&e.to_string())) + }) + }) + .collect::, Error>>()?; + let omicron_zone_nics = collection + .omicron_zones + .values() + .flat_map(|found| { + found.zones.zones.iter().filter_map(|found_zone| { + InvOmicronZoneNic::new(collection_id, found_zone) + .with_context(|| format!("zone {:?}", found_zone.id)) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + .transpose() + }) + }) + .collect::, _>>()?; // This implementation inserts all records associated with the // collection in one transaction. This is primarily for simplicity. It @@ -573,6 +630,137 @@ impl DataStore { } } + // Insert rows for the sled agents that we found. In practice, we'd + // expect these to all have baseboards (if using Oxide hardware) or + // none have baseboards (if not). + { + use db::schema::hw_baseboard_id::dsl as baseboard_dsl; + use db::schema::inv_sled_agent::dsl as sa_dsl; + + // For sleds with a real baseboard id, we have to use the + // `INSERT INTO ... SELECT` pattern that we used for other types + // of rows above to pull in the baseboard id's uuid. + for sled_agent in &sled_agents_baseboards { + let baseboard_id = sled_agent.baseboard_id.as_ref().expect( + "already selected only sled agents with baseboards", + ); + let selection = db::schema::hw_baseboard_id::table + .select(( + collection_id.into_sql::(), + sled_agent + .time_collected + .into_sql::(), + sled_agent + .source + .clone() + .into_sql::(), + sled_agent + .sled_id + .into_sql::(), + baseboard_dsl::id.nullable(), + nexus_db_model::ipv6::Ipv6Addr::from( + sled_agent.sled_agent_address.ip(), + ) + .into_sql::(), + SqlU16(sled_agent.sled_agent_address.port()) + .into_sql::(), + SledRole::from(sled_agent.sled_role) + .into_sql::(), + SqlU32(sled_agent.usable_hardware_threads) + .into_sql::(), + nexus_db_model::ByteCount::from( + sled_agent.usable_physical_ram, + ) + .into_sql::(), + nexus_db_model::ByteCount::from( + sled_agent.reservoir_size, + ) + .into_sql::(), + )) + .filter( + baseboard_dsl::part_number + .eq(baseboard_id.part_number.clone()), + ) + .filter( + baseboard_dsl::serial_number + .eq(baseboard_id.serial_number.clone()), + ); + + let _ = + diesel::insert_into(db::schema::inv_sled_agent::table) + .values(selection) + .into_columns(( + sa_dsl::inv_collection_id, + sa_dsl::time_collected, + sa_dsl::source, + sa_dsl::sled_id, + sa_dsl::hw_baseboard_id, + sa_dsl::sled_agent_ip, + sa_dsl::sled_agent_port, + sa_dsl::sled_role, + sa_dsl::usable_hardware_threads, + sa_dsl::usable_physical_ram, + sa_dsl::reservoir_size, + )) + .execute_async(&conn) + .await?; + + // See the comment in the earlier block (where we use + // `inv_service_processor::all_columns()`). The same + // applies here. + let ( + _inv_collection_id, + _time_collected, + _source, + _sled_id, + _hw_baseboard_id, + _sled_agent_ip, + _sled_agent_port, + _sled_role, + _usable_hardware_threads, + _usable_physical_ram, + _reservoir_size, + ) = sa_dsl::inv_sled_agent::all_columns(); + } + + // For sleds with no baseboard information, we can't use + // the same INSERT INTO ... SELECT pattern because we + // won't find anything in the hw_baseboard_id table. It + // sucks that these are bifurcated code paths, but on + // the plus side, this is a much simpler INSERT, and we + // can insert all of them in one statement. + let _ = diesel::insert_into(db::schema::inv_sled_agent::table) + .values(sled_agents_no_baseboards) + .execute_async(&conn) + .await?; + } + + // Insert all the Omicron zones that we found. + { + use db::schema::inv_sled_omicron_zones::dsl as sled_zones; + let _ = diesel::insert_into(sled_zones::inv_sled_omicron_zones) + .values(sled_omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::inv_omicron_zone::dsl as omicron_zone; + let _ = diesel::insert_into(omicron_zone::inv_omicron_zone) + .values(omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::inv_omicron_zone_nic::dsl as omicron_zone_nic; + let _ = + diesel::insert_into(omicron_zone_nic::inv_omicron_zone_nic) + .values(omicron_zone_nics) + .execute_async(&conn) + .await?; + } + // Finally, insert the list of errors. { use db::schema::inv_collection_error::dsl as errors_dsl; @@ -825,7 +1013,18 @@ impl DataStore { // start removing it and we'd also need to make sure we didn't leak a // collection if we crash while deleting it. let conn = self.pool_connection_authorized(opctx).await?; - let (ncollections, nsps, nrots, ncabooses, nrot_pages, nerrors) = conn + let ( + ncollections, + nsps, + nrots, + ncabooses, + nrot_pages, + nsled_agents, + nsled_agent_zones, + nzones, + nnics, + nerrors, + ) = conn .transaction_async(|conn| async move { // Remove the record describing the collection itself. let ncollections = { @@ -881,6 +1080,48 @@ impl DataStore { .await? }; + // Remove rows for sled agents found. + let nsled_agents = { + use db::schema::inv_sled_agent::dsl; + diesel::delete( + dsl::inv_sled_agent + .filter(dsl::inv_collection_id.eq(collection_id)), + ) + .execute_async(&conn) + .await? + }; + + // Remove rows associated with Omicron zones + let nsled_agent_zones = { + use db::schema::inv_sled_omicron_zones::dsl; + diesel::delete( + dsl::inv_sled_omicron_zones + .filter(dsl::inv_collection_id.eq(collection_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones = { + use db::schema::inv_omicron_zone::dsl; + diesel::delete( + dsl::inv_omicron_zone + .filter(dsl::inv_collection_id.eq(collection_id)), + ) + .execute_async(&conn) + .await? + }; + + let nnics = { + use db::schema::inv_omicron_zone_nic::dsl; + diesel::delete( + dsl::inv_omicron_zone_nic + .filter(dsl::inv_collection_id.eq(collection_id)), + ) + .execute_async(&conn) + .await? + }; + // Remove rows for errors encountered. let nerrors = { use db::schema::inv_collection_error::dsl; @@ -892,7 +1133,18 @@ impl DataStore { .await? }; - Ok((ncollections, nsps, nrots, ncabooses, nrot_pages, nerrors)) + Ok(( + ncollections, + nsps, + nrots, + ncabooses, + nrot_pages, + nsled_agents, + nsled_agent_zones, + nzones, + nnics, + nerrors, + )) }) .await .map_err(|error| match error { @@ -909,6 +1161,10 @@ impl DataStore { "nrots" => nrots, "ncabooses" => ncabooses, "nrot_pages" => nrot_pages, + "nsled_agents" => nsled_agents, + "nsled_agent_zones" => nsled_agent_zones, + "nzones" => nzones, + "nnics" => nnics, "nerrors" => nerrors, ); @@ -1085,9 +1341,27 @@ impl DataStore { }; limit_reached = limit_reached || rots.len() == usize_limit; - // Collect the unique baseboard ids referenced by SPs and RoTs. - let baseboard_id_ids: BTreeSet<_> = - sps.keys().chain(rots.keys()).cloned().collect(); + let sled_agent_rows: Vec<_> = { + use db::schema::inv_sled_agent::dsl; + dsl::inv_sled_agent + .filter(dsl::inv_collection_id.eq(id)) + .limit(sql_limit) + .select(InvSledAgent::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + }; + + // Collect the unique baseboard ids referenced by SPs, RoTs, and Sled + // Agents. + let baseboard_id_ids: BTreeSet<_> = sps + .keys() + .chain(rots.keys()) + .cloned() + .chain(sled_agent_rows.iter().filter_map(|s| s.hw_baseboard_id)) + .collect(); // Fetch the corresponding baseboard records. let baseboards_by_id: BTreeMap<_, _> = { use db::schema::hw_baseboard_id::dsl; @@ -1136,6 +1410,49 @@ impl DataStore { }) }) .collect::, _>>()?; + let sled_agents: BTreeMap<_, _> = + sled_agent_rows + .into_iter() + .map(|s: InvSledAgent| { + let sled_id = s.sled_id; + let baseboard_id = s + .hw_baseboard_id + .map(|id| { + baseboards_by_id.get(&id).cloned().ok_or_else( + || { + Error::internal_error( + "missing baseboard that we should have fetched", + ) + }, + ) + }) + .transpose()?; + let sled_agent = nexus_types::inventory::SledAgent { + time_collected: s.time_collected, + source: s.source, + sled_id, + baseboard_id, + sled_agent_address: std::net::SocketAddrV6::new( + std::net::Ipv6Addr::from(s.sled_agent_ip), + u16::from(s.sled_agent_port), + 0, + 0, + ), + sled_role: nexus_types::inventory::SledRole::from( + s.sled_role, + ), + usable_hardware_threads: u32::from( + s.usable_hardware_threads, + ), + usable_physical_ram: s.usable_physical_ram.into(), + reservoir_size: s.reservoir_size.into(), + }; + Ok((sled_id, sled_agent)) + }) + .collect::, + Error, + >>()?; // Fetch records of cabooses found. let inv_caboose_rows = { @@ -1237,7 +1554,7 @@ impl DataStore { .iter() .map(|inv_rot_page| inv_rot_page.sw_root_of_trust_page_id) .collect(); - // Fetch the corresponing records. + // Fetch the corresponding records. let rot_pages_by_id: BTreeMap<_, _> = { use db::schema::sw_root_of_trust_page::dsl; dsl::sw_root_of_trust_page @@ -1299,6 +1616,117 @@ impl DataStore { ); } + // Now read the Omicron zones. + // + // In the first pass, we'll load the "inv_sled_omicron_zones" records. + // There's one of these per sled. It does not contain the actual list + // of zones -- basically just collection metadata and the generation + // number. We'll assemble these directly into the data structure we're + // trying to build, which maps sled ids to objects describing the zones + // found on each sled. + let mut omicron_zones: BTreeMap<_, _> = { + use db::schema::inv_sled_omicron_zones::dsl; + dsl::inv_sled_omicron_zones + .filter(dsl::inv_collection_id.eq(id)) + .limit(sql_limit) + .select(InvSledOmicronZones::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|sled_zones_config| { + ( + sled_zones_config.sled_id, + sled_zones_config.into_uninit_zones_found(), + ) + }) + .collect() + }; + limit_reached = limit_reached || omicron_zones.len() == usize_limit; + + // Assemble a mutable map of all the NICs found, by NIC id. As we + // match these up with the corresponding zone below, we'll remove items + // from this set. That way we can tell if the same NIC was used twice + // or not used at all. + let mut omicron_zone_nics: BTreeMap<_, _> = { + use db::schema::inv_omicron_zone_nic::dsl; + dsl::inv_omicron_zone_nic + .filter(dsl::inv_collection_id.eq(id)) + .limit(sql_limit) + .select(InvOmicronZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|found_zone_nic| (found_zone_nic.id, found_zone_nic)) + .collect() + }; + limit_reached = limit_reached || omicron_zone_nics.len() == usize_limit; + + // Now load the actual list of zones from all sleds. + let omicron_zones_list = { + use db::schema::inv_omicron_zone::dsl; + dsl::inv_omicron_zone + .filter(dsl::inv_collection_id.eq(id)) + // It's not strictly necessary to order these by id. Doing so + // ensures a consistent representation for `Collection`, which + // makes testing easier. It's already indexed to do this, too. + .order_by(dsl::id) + .limit(sql_limit) + .select(InvOmicronZone::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + }; + limit_reached = + limit_reached || omicron_zones_list.len() == usize_limit; + for z in omicron_zones_list { + let nic_row = z + .nic_id + .map(|id| { + // This error means that we found a row in inv_omicron_zone + // that references a NIC by id but there's no corresponding + // row in inv_omicron_zone_nic with that id. This should be + // impossible and reflects either a bug or database + // corruption. + omicron_zone_nics.remove(&id).ok_or_else(|| { + Error::internal_error(&format!( + "zone {:?}: expected to find NIC {:?}, but didn't", + z.id, z.nic_id + )) + }) + }) + .transpose()?; + let map = omicron_zones.get_mut(&z.sled_id).ok_or_else(|| { + // This error means that we found a row in inv_omicron_zone with + // no associated record in inv_sled_omicron_zones. This should + // be impossible and reflects either a bug or database + // corruption. + Error::internal_error(&format!( + "zone {:?}: unknown sled: {:?}", + z.id, z.sled_id + )) + })?; + let zone_id = z.id; + let zone = z + .into_omicron_zone_config(nic_row) + .with_context(|| { + format!("zone {:?}: parse from database", zone_id) + }) + .map_err(|e| { + Error::internal_error(&format!("{:#}", e.to_string())) + })?; + map.zones.zones.push(zone); + } + + bail_unless!( + omicron_zone_nics.is_empty(), + "found extra Omicron zone NICs: {:?}", + omicron_zone_nics.keys() + ); + Ok(( Collection { id, @@ -1313,6 +1741,8 @@ impl DataStore { rots, cabooses_found, rot_pages_found, + sled_agents, + omicron_zones, }, limit_reached, )) @@ -1476,7 +1906,7 @@ mod test { assert_eq!(collection1, collection_read); // There ought to be no baseboards, cabooses, or RoT pages in the - // databases from that collection. + // database from that collection. assert_eq!(collection1.baseboards.len(), 0); assert_eq!(collection1.cabooses.len(), 0); assert_eq!(collection1.rot_pages.len(), 0); @@ -1815,6 +2245,39 @@ mod test { .await .unwrap(); assert_eq!(0, count); + let count = + schema::inv_root_of_trust_page::dsl::inv_root_of_trust_page + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_sled_agent::dsl::inv_sled_agent + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = + schema::inv_sled_omicron_zones::dsl::inv_sled_omicron_zones + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_zone::dsl::inv_omicron_zone + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_zone_nic::dsl::inv_omicron_zone_nic + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + Ok::<(), anyhow::Error>(()) }) .await diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index e96a15894d..6fb951de84 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -58,6 +58,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "service_kind", "sled_provision_state", "sled_resource_kind", + "sled_role", "snapshot_state", "sp_type", "switch_interface_kind", @@ -73,6 +74,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "vpc_firewall_rule_protocol", "vpc_firewall_rule_status", "vpc_router_kind", + "zone_type", ]; const CUSTOM_TYPE_SCHEMA: &'static str = "public"; diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 22b48ebcec..1c20e8f8b6 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -8,9 +8,14 @@ license = "MPL-2.0" anyhow.workspace = true base64.workspace = true chrono.workspace = true +futures.workspace = true gateway-client.workspace = true gateway-messages.workspace = true nexus-types.workspace = true +omicron-common.workspace = true +reqwest.workspace = true +serde_json.workspace = true +sled-agent-client.workspace = true slog.workspace = true strum.workspace = true thiserror.workspace = true @@ -20,5 +25,6 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true gateway-test-utils.workspace = true +omicron-sled-agent.workspace = true regex.workspace = true tokio.workspace = true diff --git a/nexus/inventory/example-data/madrid-sled14.json b/nexus/inventory/example-data/madrid-sled14.json new file mode 100644 index 0000000000..f91c12d3f0 --- /dev/null +++ b/nexus/inventory/example-data/madrid-sled14.json @@ -0,0 +1,214 @@ +{ + "generation": 5, + "zones": [ + { + "id": "0a5f085b-dfb9-4eed-bd24-678bd97e453c", + "underlay_address": "fd00:1122:3344:104::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::c]:32345", + "dataset": { + "pool_name": "oxp_e1bf20e5-603c-4d14-94c4-47dc1eb58c45" + } + } + }, + { + "id": "175eb50f-c54c-41ed-b30e-bb710868b362", + "underlay_address": "fd00:1122:3344:104::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::a]:32345", + "dataset": { + "pool_name": "oxp_3bcdbecd-827a-426e-96b6-30c355b78301" + } + } + }, + { + "id": "844a964a-831c-4cb9-82b5-3883c9b404db", + "underlay_address": "fd00:1122:3344:104::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::e]:32345", + "dataset": { + "pool_name": "oxp_d721dbb5-6a10-4fe8-9d70-fae69ab84676" + } + } + }, + { + "id": "cd8a5031-44a3-4090-86d7-2bfcc3de7942", + "underlay_address": "fd00:1122:3344:104::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::d]:32345", + "dataset": { + "pool_name": "oxp_a90de3a7-b760-45b7-ad72-70cd3570a940" + } + } + }, + { + "id": "f7f78c86-f572-49bf-b6cd-24658ddee847", + "underlay_address": "fd00:1122:3344:104::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::7]:32345", + "dataset": { + "pool_name": "oxp_5dd3aedf-c3c5-4258-8864-3ea8b5ae321b" + } + } + }, + { + "id": "543e32e4-7d8c-4888-a085-1c530555ee22", + "underlay_address": "fd00:1122:3344:104::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::6]:32345", + "dataset": { + "pool_name": "oxp_46b4a891-addc-4690-b8ff-8625b9c5c3bc" + } + } + }, + { + "id": "28786d99-48d2-4491-a4ae-943e603f3dab", + "underlay_address": "fd00:1122:3344:104::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::9]:32345", + "dataset": { + "pool_name": "oxp_8c96d804-3a6c-4c1b-be24-1e7fc18824de" + } + } + }, + { + "id": "e59b6fc3-3b0e-4e17-acfa-0351e2924771", + "underlay_address": "fd00:1122:3344:104::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::b]:32345", + "dataset": { + "pool_name": "oxp_49154338-0e01-4394-9dd2-cb4c53cbb90f" + } + } + }, + { + "id": "ab67e1fa-337f-45e6-83f0-6e94a9d50fc0", + "underlay_address": "fd00:1122:3344:104::4", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:104::4]:12221", + "external_ip": "172.20.28.2", + "nic": { + "id": "5d4a7e78-d1e1-41cd-881c-02d808fb90be", + "kind": { + "type": "service", + "id": "ab67e1fa-337f-45e6-83f0-6e94a9d50fc0" + }, + "name": "nexus-ab67e1fa-337f-45e6-83f0-6e94a9d50fc0", + "ip": "172.30.2.5", + "mac": "A8:40:25:FF:B7:E2", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ] + } + }, + { + "id": "5a6d10a6-ce94-444b-82ce-be25ebe58b9a", + "underlay_address": "fd00:1122:3344:104::f", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::f]:32345", + "dataset": { + "pool_name": "oxp_13a6ef76-5904-4794-8083-dfeb6806e5f1" + } + } + }, + { + "id": "442c669b-14d4-48b5-8f05-741b3c67a558", + "underlay_address": "fd00:1122:3344:104::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:104::8]:32345", + "dataset": { + "pool_name": "oxp_0010be1f-4223-4f3e-844c-3e823488a852" + } + } + }, + { + "id": "5db69c8f-4565-4cae-8372-f20ada0f67e9", + "underlay_address": "fd00:1122:3344:104::5", + "zone_type": { + "type": "clickhouse", + "address": "[fd00:1122:3344:104::5]:8123", + "dataset": { + "pool_name": "oxp_46b4a891-addc-4690-b8ff-8625b9c5c3bc" + } + } + }, + { + "id": "5d840664-3eb1-45da-8876-d44e1cfb1142", + "underlay_address": "fd00:1122:3344:104::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:104::3]:32221", + "dataset": { + "pool_name": "oxp_46b4a891-addc-4690-b8ff-8625b9c5c3bc" + } + } + }, + { + "id": "d38984ac-a366-4936-b64f-d98ae3dc2035", + "underlay_address": "fd00:1122:3344:104::10", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:104::10]:123", + "ntp_servers": [ + "ntp.eng.oxide.computer" + ], + "dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ], + "domain": null, + "nic": { + "id": "2e4943f4-0477-4b5b-afd7-70c1f4aaf928", + "kind": { + "type": "service", + "id": "d38984ac-a366-4936-b64f-d98ae3dc2035" + }, + "name": "ntp-d38984ac-a366-4936-b64f-d98ae3dc2035", + "ip": "172.30.3.6", + "mac": "A8:40:25:FF:C0:38", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "172.20.28.6", + "first_port": 16384, + "last_port": 32767 + } + } + }, + { + "id": "23856e18-8736-49a6-b487-bc5bf850fee0", + "underlay_address": "fd00:1122:3344:2::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_46b4a891-addc-4690-b8ff-8625b9c5c3bc" + }, + "http_address": "[fd00:1122:3344:2::1]:5353", + "dns_address": "[fd00:1122:3344:2::1]:53", + "gz_address": "fd00:1122:3344:2::2", + "gz_address_index": 1 + } + } + ] +} diff --git a/nexus/inventory/example-data/madrid-sled16.json b/nexus/inventory/example-data/madrid-sled16.json new file mode 100644 index 0000000000..edf3c71571 --- /dev/null +++ b/nexus/inventory/example-data/madrid-sled16.json @@ -0,0 +1,206 @@ +{ + "generation": 5, + "zones": [ + { + "id": "b2629475-65b2-4e8a-9e70-d4e8c034d8ad", + "underlay_address": "fd00:1122:3344:102::e", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::e]:32345", + "dataset": { + "pool_name": "oxp_1cd1c449-b5e1-4e8b-bb2f-2e2bd5a8f301" + } + } + }, + { + "id": "1aa5fd71-d766-4f20-b3c7-9cf4fe9e4f2e", + "underlay_address": "fd00:1122:3344:102::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::9]:32345", + "dataset": { + "pool_name": "oxp_6d799846-deac-4809-93bd-5dad30127938" + } + } + }, + { + "id": "271ee61b-9e97-4e45-a407-0083f8bf15a7", + "underlay_address": "fd00:1122:3344:102::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::7]:32345", + "dataset": { + "pool_name": "oxp_65de425b-1487-4d46-85b5-f5fa7c9e776a" + } + } + }, + { + "id": "750b40ef-8e83-4c7a-be96-33964b2244f3", + "underlay_address": "fd00:1122:3344:102::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::a]:32345", + "dataset": { + "pool_name": "oxp_901a85dd-8214-407a-a358-ef4aebfa810d" + } + } + }, + { + "id": "0322760d-a1e2-4911-8745-569f6bad8251", + "underlay_address": "fd00:1122:3344:102::4", + "zone_type": { + "type": "external_dns", + "dataset": { + "pool_name": "oxp_65de425b-1487-4d46-85b5-f5fa7c9e776a" + }, + "http_address": "[fd00:1122:3344:102::4]:5353", + "dns_address": "172.20.28.1:53", + "nic": { + "id": "8b99b41f-976d-4cb5-bad6-492cde39575a", + "kind": { + "type": "service", + "id": "0322760d-a1e2-4911-8745-569f6bad8251" + }, + "name": "external-dns-0322760d-a1e2-4911-8745-569f6bad8251", + "ip": "172.30.1.5", + "mac": "A8:40:25:FF:F7:4A", + "subnet": "172.30.1.0/24", + "vni": 100, + "primary": true, + "slot": 0 + } + } + }, + { + "id": "f350b534-e9bb-4e47-a2ae-4029efe48e1a", + "underlay_address": "fd00:1122:3344:102::6", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:102::6]:17000" + } + }, + { + "id": "e9d7d6ba-59e3-44ff-9081-f43e61c9968a", + "underlay_address": "fd00:1122:3344:102::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::d]:32345", + "dataset": { + "pool_name": "oxp_51abdeb3-6673-4af3-aa91-7e8748e4dda2" + } + } + }, + { + "id": "d02206f1-7567-4753-9221-6b2b70407925", + "underlay_address": "fd00:1122:3344:102::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::b]:32345", + "dataset": { + "pool_name": "oxp_0fa59017-d1e7-47c1-9ed6-b66851e544ee" + } + } + }, + { + "id": "c489b9a3-33e5-487c-8a60-77853584dca1", + "underlay_address": "fd00:1122:3344:102::f", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::f]:32345", + "dataset": { + "pool_name": "oxp_17d7dbce-b430-4c71-a27e-a5e66d175347" + } + } + }, + { + "id": "996f3011-5aaa-4732-a47d-e6514b1131d8", + "underlay_address": "fd00:1122:3344:102::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::c]:32345", + "dataset": { + "pool_name": "oxp_87714aed-4573-438c-8c9d-3ed64688bdc4" + } + } + }, + { + "id": "cef138ff-87a4-4509-ba30-2395e01ac5f7", + "underlay_address": "fd00:1122:3344:102::5", + "zone_type": { + "type": "oximeter", + "address": "[fd00:1122:3344:102::5]:12223" + } + }, + { + "id": "263584b3-2f53-4f87-a9c0-60a4c78af6c4", + "underlay_address": "fd00:1122:3344:102::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:102::8]:32345", + "dataset": { + "pool_name": "oxp_2acbc210-8b83-490a-b7a7-e458d742c269" + } + } + }, + { + "id": "2f336547-e4b0-422c-af54-deae20b4580c", + "underlay_address": "fd00:1122:3344:102::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:102::3]:32221", + "dataset": { + "pool_name": "oxp_65de425b-1487-4d46-85b5-f5fa7c9e776a" + } + } + }, + { + "id": "412bfd7b-4bf8-471d-ae4d-90bf0bdd05ff", + "underlay_address": "fd00:1122:3344:102::10", + "zone_type": { + "type": "boundary_ntp", + "address": "[fd00:1122:3344:102::10]:123", + "ntp_servers": [ + "ntp.eng.oxide.computer" + ], + "dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ], + "domain": null, + "nic": { + "id": "6c2aa1c5-0e42-4b80-9b31-26d0e8599d0d", + "kind": { + "type": "service", + "id": "412bfd7b-4bf8-471d-ae4d-90bf0bdd05ff" + }, + "name": "ntp-412bfd7b-4bf8-471d-ae4d-90bf0bdd05ff", + "ip": "172.30.3.5", + "mac": "A8:40:25:FF:F2:45", + "subnet": "172.30.3.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "snat_cfg": { + "ip": "172.20.28.5", + "first_port": 0, + "last_port": 16383 + } + } + }, + { + "id": "7de28140-8cdc-4478-9204-63763ecc10ff", + "underlay_address": "fd00:1122:3344:1::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_65de425b-1487-4d46-85b5-f5fa7c9e776a" + }, + "http_address": "[fd00:1122:3344:1::1]:5353", + "dns_address": "[fd00:1122:3344:1::1]:53", + "gz_address": "fd00:1122:3344:1::2", + "gz_address_index": 0 + } + } + ] +} diff --git a/nexus/inventory/example-data/madrid-sled17.json b/nexus/inventory/example-data/madrid-sled17.json new file mode 100644 index 0000000000..8ac5dff840 --- /dev/null +++ b/nexus/inventory/example-data/madrid-sled17.json @@ -0,0 +1,172 @@ +{ + "generation": 5, + "zones": [ + { + "id": "e58917eb-98cc-4b85-b851-4b46833060dc", + "underlay_address": "fd00:1122:3344:103::8", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::8]:32345", + "dataset": { + "pool_name": "oxp_c6911096-09b3-4f64-bcd6-21701ca2d6ae" + } + } + }, + { + "id": "ae07bfa3-09a9-4b19-9721-c89f39e153fe", + "underlay_address": "fd00:1122:3344:103::7", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::7]:32345", + "dataset": { + "pool_name": "oxp_4c667609-0876-4c8c-ae60-1b30aaf236dc" + } + } + }, + { + "id": "6e305032-a926-4c2b-a89a-0165799b9810", + "underlay_address": "fd00:1122:3344:103::9", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::9]:32345", + "dataset": { + "pool_name": "oxp_d9974711-1064-441d-ba75-0ffabfc86d27" + } + } + }, + { + "id": "2deb45cb-7160-47fc-9180-ab14c1731427", + "underlay_address": "fd00:1122:3344:103::c", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::c]:32345", + "dataset": { + "pool_name": "oxp_e675c45a-5e1b-4d24-99af-806523ed17d5" + } + } + }, + { + "id": "804f9ff7-0d45-465f-8820-ee0fc7c25286", + "underlay_address": "fd00:1122:3344:103::4", + "zone_type": { + "type": "nexus", + "internal_address": "[fd00:1122:3344:103::4]:12221", + "external_ip": "172.20.28.3", + "nic": { + "id": "3e1324f0-cad4-484e-a101-e26da2706e92", + "kind": { + "type": "service", + "id": "804f9ff7-0d45-465f-8820-ee0fc7c25286" + }, + "name": "nexus-804f9ff7-0d45-465f-8820-ee0fc7c25286", + "ip": "172.30.2.6", + "mac": "A8:40:25:FF:A5:50", + "subnet": "172.30.2.0/24", + "vni": 100, + "primary": true, + "slot": 0 + }, + "external_tls": true, + "external_dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ] + } + }, + { + "id": "c28abc48-2fb2-487b-b89a-96317c4e2df2", + "underlay_address": "fd00:1122:3344:103::b", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::b]:32345", + "dataset": { + "pool_name": "oxp_93017061-5910-4bf5-a366-4f1b2871b5c3" + } + } + }, + { + "id": "4da2814a-7d31-4311-97cf-7648e7b64911", + "underlay_address": "fd00:1122:3344:103::d", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::d]:32345", + "dataset": { + "pool_name": "oxp_33a4881a-2b3f-4840-b252-f370024eee64" + } + } + }, + { + "id": "a3a6216e-fdc8-47b6-8ba8-eb666629f5c2", + "underlay_address": "fd00:1122:3344:103::a", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::a]:32345", + "dataset": { + "pool_name": "oxp_844fd687-fd26-4616-91aa-441cf136c62d" + } + } + }, + { + "id": "bd646149-3e59-4aac-b2a0-b79910b8d6a8", + "underlay_address": "fd00:1122:3344:103::6", + "zone_type": { + "type": "crucible", + "address": "[fd00:1122:3344:103::6]:32345", + "dataset": { + "pool_name": "oxp_d130514f-6532-4c02-ac9f-c4958052c669" + } + } + }, + { + "id": "8c9735b2-9097-4ba5-b783-dfea16c5e0ab", + "underlay_address": "fd00:1122:3344:103::5", + "zone_type": { + "type": "crucible_pantry", + "address": "[fd00:1122:3344:103::5]:17000" + } + }, + { + "id": "8fe2fb59-5a89-4d40-b47a-6d5fcfb66ddd", + "underlay_address": "fd00:1122:3344:103::3", + "zone_type": { + "type": "cockroach_db", + "address": "[fd00:1122:3344:103::3]:32221", + "dataset": { + "pool_name": "oxp_d130514f-6532-4c02-ac9f-c4958052c669" + } + } + }, + { + "id": "9ee036cf-88c5-4a0e-aae7-eb2849379aad", + "underlay_address": "fd00:1122:3344:103::e", + "zone_type": { + "type": "internal_ntp", + "address": "[fd00:1122:3344:103::e]:123", + "ntp_servers": [ + "412bfd7b-4bf8-471d-ae4d-90bf0bdd05ff.host.control-plane.oxide.internal", + "d38984ac-a366-4936-b64f-d98ae3dc2035.host.control-plane.oxide.internal" + ], + "dns_servers": [ + "fd00:1122:3344:1::1", + "fd00:1122:3344:2::1", + "fd00:1122:3344:3::1" + ], + "domain": null + } + }, + { + "id": "7b006fad-d693-441b-bdd0-84cb323530e9", + "underlay_address": "fd00:1122:3344:3::1", + "zone_type": { + "type": "internal_dns", + "dataset": { + "pool_name": "oxp_d130514f-6532-4c02-ac9f-c4958052c669" + }, + "http_address": "[fd00:1122:3344:3::1]:5353", + "dns_address": "[fd00:1122:3344:3::1]:53", + "gz_address": "fd00:1122:3344:3::2", + "gz_address_index": 2 + } + } + ] +} diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 2d8ba0d1f9..602655ef0b 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -19,11 +19,14 @@ use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseFound; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; +use nexus_types::inventory::OmicronZonesFound; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageFound; use nexus_types::inventory::RotPageWhich; use nexus_types::inventory::RotState; use nexus_types::inventory::ServiceProcessor; +use nexus_types::inventory::SledAgent; +use omicron_common::api::external::ByteCount; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::sync::Arc; @@ -81,6 +84,8 @@ pub struct CollectionBuilder { BTreeMap, CabooseFound>>, rot_pages_found: BTreeMap, RotPageFound>>, + sleds: BTreeMap, + omicron_zones: BTreeMap, } impl CollectionBuilder { @@ -101,11 +106,19 @@ impl CollectionBuilder { rots: BTreeMap::new(), cabooses_found: BTreeMap::new(), rot_pages_found: BTreeMap::new(), + sleds: BTreeMap::new(), + omicron_zones: BTreeMap::new(), } } /// Assemble a complete `Collection` representation - pub fn build(self) -> Collection { + pub fn build(mut self) -> Collection { + // This is not strictly necessary. But for testing, it's helpful for + // things to be in sorted order. + for v in self.omicron_zones.values_mut() { + v.zones.zones.sort_by(|a, b| a.id.cmp(&b.id)); + } + Collection { id: Uuid::new_v4(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), @@ -119,6 +132,8 @@ impl CollectionBuilder { rots: self.rots, cabooses_found: self.cabooses_found, rot_pages_found: self.rot_pages_found, + sled_agents: self.sleds, + omicron_zones: self.omicron_zones, } } @@ -387,6 +402,105 @@ impl CollectionBuilder { pub fn found_error(&mut self, error: InventoryError) { self.errors.push(error); } + + /// Record information about a sled that's part of the control plane + pub fn found_sled_inventory( + &mut self, + source: &str, + inventory: sled_agent_client::types::Inventory, + ) -> Result<(), anyhow::Error> { + let sled_id = inventory.sled_id; + + // Normalize the baseboard id, if any. + use sled_agent_client::types::Baseboard; + let baseboard_id = match inventory.baseboard { + Baseboard::Pc { .. } => None, + Baseboard::Gimlet { identifier, model, revision: _ } => { + Some(Self::normalize_item( + &mut self.baseboards, + BaseboardId { + serial_number: identifier, + part_number: model, + }, + )) + } + Baseboard::Unknown => { + self.found_error(InventoryError::from(anyhow!( + "sled {:?}: reported unknown baseboard", + sled_id + ))); + None + } + }; + + // Socket addresses come through the OpenAPI spec as strings, which + // means they don't get validated when everything else does. This + // error is an operational error in collecting the data, not a collector + // bug. + let sled_agent_address = match inventory.sled_agent_address.parse() { + Ok(addr) => addr, + Err(error) => { + self.found_error(InventoryError::from(anyhow!( + "sled {:?}: bad sled agent address: {:?}: {:#}", + sled_id, + inventory.sled_agent_address, + error, + ))); + return Ok(()); + } + }; + let sled = SledAgent { + source: source.to_string(), + sled_agent_address, + sled_role: inventory.sled_role, + baseboard_id, + usable_hardware_threads: inventory.usable_hardware_threads, + usable_physical_ram: ByteCount::from(inventory.usable_physical_ram), + reservoir_size: ByteCount::from(inventory.reservoir_size), + time_collected: now(), + sled_id, + }; + + if let Some(previous) = self.sleds.get(&sled_id) { + Err(anyhow!( + "sled {:?}: reported sled multiple times \ + (previously {:?}, now {:?})", + sled_id, + previous, + sled, + )) + } else { + self.sleds.insert(sled_id, sled); + Ok(()) + } + } + + /// Record information about Omicron zones found on a sled + pub fn found_sled_omicron_zones( + &mut self, + source: &str, + sled_id: Uuid, + zones: sled_agent_client::types::OmicronZonesConfig, + ) -> Result<(), anyhow::Error> { + if let Some(previous) = self.omicron_zones.get(&sled_id) { + Err(anyhow!( + "sled {:?} omicron zones: reported previously: {:?}", + sled_id, + previous + )) + } else { + self.omicron_zones.insert( + sled_id, + OmicronZonesFound { + time_collected: now(), + source: source.to_string(), + sled_id, + zones, + }, + ); + Ok(()) + } + } } /// Returns the current time, truncated to the previous microsecond. @@ -422,6 +536,8 @@ mod test { use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; + use nexus_types::inventory::SledRole; + use omicron_common::api::external::ByteCount; // Verify the contents of an empty collection. #[test] @@ -455,6 +571,8 @@ mod test { // - some missing cabooses // - some cabooses common to multiple baseboards; others not // - serial number reused across different model numbers + // - sled agent inventory + // - omicron zone inventory // // This test is admittedly pretty tedious and maybe not worthwhile but it's // a useful quick check. @@ -463,9 +581,11 @@ mod test { let time_before = now(); let Representative { builder, - sleds: [sled1_bb, sled2_bb, sled3_bb], + sleds: [sled1_bb, sled2_bb, sled3_bb, sled4_bb], switch, psc, + sled_agents: + [sled_agent_id_basic, sled_agent_id_extra, sled_agent_id_pc, sled_agent_id_unknown], } = representative(); let collection = builder.build(); let time_after = now(); @@ -479,21 +599,27 @@ mod test { // no RoT information. assert_eq!( collection.errors.iter().map(|e| e.to_string()).collect::>(), - ["MGS \"fake MGS 1\": reading RoT state for BaseboardId \ + [ + "MGS \"fake MGS 1\": reading RoT state for BaseboardId \ { part_number: \"model1\", serial_number: \"s2\" }: test suite \ - injected error"] + injected error", + "sled 5c5b4cf9-3e13-45fd-871c-f177d6537510: reported unknown \ + baseboard" + ] ); // Verify the baseboard ids found. let expected_baseboards = - &[&sled1_bb, &sled2_bb, &sled3_bb, &switch, &psc]; + &[&sled1_bb, &sled2_bb, &sled3_bb, &sled4_bb, &switch, &psc]; for bb in expected_baseboards { assert!(collection.baseboards.contains(*bb)); } assert_eq!(collection.baseboards.len(), expected_baseboards.len()); // Verify the stuff that's easy to verify for all SPs: timestamps. - assert_eq!(collection.sps.len(), collection.baseboards.len()); + // There will be one more baseboard than SP because of the one added for + // the extra sled agent. + assert_eq!(collection.sps.len() + 1, collection.baseboards.len()); for (bb, sp) in collection.sps.iter() { assert!(collection.time_started <= sp.time_collected); assert!(sp.time_collected <= collection.time_done); @@ -755,6 +881,42 @@ mod test { // plus the common one; same for RoT pages. assert_eq!(collection.cabooses.len(), 5); assert_eq!(collection.rot_pages.len(), 5); + + // Verify that we found the sled agents. + assert_eq!(collection.sled_agents.len(), 4); + for (sled_id, sled_agent) in &collection.sled_agents { + assert_eq!(*sled_id, sled_agent.sled_id); + if *sled_id == sled_agent_id_extra { + assert_eq!(sled_agent.sled_role, SledRole::Scrimlet); + } else { + assert_eq!(sled_agent.sled_role, SledRole::Gimlet); + } + + assert_eq!( + sled_agent.sled_agent_address, + "[::1]:56792".parse().unwrap() + ); + assert_eq!(sled_agent.usable_hardware_threads, 10); + assert_eq!( + sled_agent.usable_physical_ram, + ByteCount::from(1024 * 1024) + ); + assert_eq!(sled_agent.reservoir_size, ByteCount::from(1024)); + } + + let sled1_agent = &collection.sled_agents[&sled_agent_id_basic]; + let sled1_bb = sled1_agent.baseboard_id.as_ref().unwrap(); + assert_eq!(sled1_bb.part_number, "model1"); + assert_eq!(sled1_bb.serial_number, "s1"); + let sled4_agent = &collection.sled_agents[&sled_agent_id_extra]; + let sled4_bb = sled4_agent.baseboard_id.as_ref().unwrap(); + assert_eq!(sled4_bb.serial_number, "s4"); + assert!(collection.sled_agents[&sled_agent_id_pc] + .baseboard_id + .is_none()); + assert!(collection.sled_agents[&sled_agent_id_unknown] + .baseboard_id + .is_none()); } // Exercises all the failure cases that shouldn't happen in real systems. diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index aeca6e43a1..9b335d3ee4 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -6,6 +6,7 @@ use crate::builder::CollectionBuilder; use crate::builder::InventoryError; +use crate::SledAgentEnumerator; use anyhow::Context; use gateway_client::types::GetCfpaParams; use gateway_client::types::RotCfpaSlot; @@ -14,25 +15,34 @@ use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; +use slog::o; use slog::{debug, error}; use std::sync::Arc; +use std::time::Duration; use strum::IntoEnumIterator; -pub struct Collector { +/// connection and request timeout used for Sled Agent HTTP client +const SLED_AGENT_TIMEOUT: Duration = Duration::from_secs(60); + +/// Collect all inventory data from an Oxide system +pub struct Collector<'a> { log: slog::Logger, mgs_clients: Vec>, + sled_agent_lister: &'a (dyn SledAgentEnumerator + Send + Sync), in_progress: CollectionBuilder, } -impl Collector { +impl<'a> Collector<'a> { pub fn new( creator: &str, mgs_clients: &[Arc], + sled_agent_lister: &'a (dyn SledAgentEnumerator + Send + Sync), log: slog::Logger, ) -> Self { Collector { log, mgs_clients: mgs_clients.to_vec(), + sled_agent_lister, in_progress: CollectionBuilder::new(creator), } } @@ -54,9 +64,8 @@ impl Collector { debug!(&self.log, "begin collection"); - // When we add stages to collect from other components (e.g., sled - // agents), those will go here. self.collect_all_mgs().await; + self.collect_all_sled_agents().await; debug!(&self.log, "finished collection"); @@ -283,15 +292,94 @@ impl Collector { } } } + + /// Collect inventory from all sled agent instances + async fn collect_all_sled_agents(&mut self) { + let urls = match self.sled_agent_lister.list_sled_agents().await { + Err(error) => { + self.in_progress.found_error(error); + return; + } + Ok(clients) => clients, + }; + + for url in urls { + let log = self.log.new(o!("SledAgent" => url.clone())); + let reqwest_client = reqwest::ClientBuilder::new() + .connect_timeout(SLED_AGENT_TIMEOUT) + .timeout(SLED_AGENT_TIMEOUT) + .build() + .unwrap(); + let client = Arc::new(sled_agent_client::Client::new_with_client( + &url, + reqwest_client, + log, + )); + + if let Err(error) = self.collect_one_sled_agent(&client).await { + error!( + &self.log, + "sled agent {:?}: {:#}", + client.baseurl(), + error + ); + } + } + } + + async fn collect_one_sled_agent( + &mut self, + client: &sled_agent_client::Client, + ) -> Result<(), anyhow::Error> { + let sled_agent_url = client.baseurl(); + debug!(&self.log, "begin collection from Sled Agent"; + "sled_agent_url" => client.baseurl() + ); + + let maybe_ident = client.inventory().await.with_context(|| { + format!("Sled Agent {:?}: inventory", &sled_agent_url) + }); + let inventory = match maybe_ident { + Ok(inventory) => inventory.into_inner(), + Err(error) => { + self.in_progress.found_error(InventoryError::from(error)); + return Ok(()); + } + }; + + let sled_id = inventory.sled_id; + self.in_progress.found_sled_inventory(&sled_agent_url, inventory)?; + + let maybe_config = + client.omicron_zones_get().await.with_context(|| { + format!("Sled Agent {:?}: omicron zones", &sled_agent_url) + }); + match maybe_config { + Err(error) => { + self.in_progress.found_error(InventoryError::from(error)); + Ok(()) + } + Ok(zones) => self.in_progress.found_sled_omicron_zones( + &sled_agent_url, + sled_id, + zones.into_inner(), + ), + } + } } #[cfg(test)] mod test { use super::Collector; + use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use nexus_types::inventory::Collection; + use omicron_sled_agent::sim; use std::fmt::Write; + use std::net::Ipv6Addr; + use std::net::SocketAddrV6; use std::sync::Arc; + use uuid::Uuid; fn dump_collection(collection: &Collection) -> String { // Construct a stable, human-readable summary of the Collection @@ -379,6 +467,35 @@ mod test { } } + write!(&mut s, "\nsled agents found:\n").unwrap(); + for (sled_id, sled_info) in &collection.sled_agents { + assert_eq!(*sled_id, sled_info.sled_id); + write!(&mut s, " sled {} ({:?})\n", sled_id, sled_info.sled_role) + .unwrap(); + write!(&mut s, " baseboard {:?}\n", sled_info.baseboard_id) + .unwrap(); + + if let Some(found_zones) = collection.omicron_zones.get(sled_id) { + assert_eq!(*sled_id, found_zones.sled_id); + write!( + &mut s, + " zone generation: {:?}\n", + found_zones.zones.generation + ) + .unwrap(); + write!(&mut s, " zones found:\n").unwrap(); + for zone in &found_zones.zones.zones { + write!( + &mut s, + " zone {} type {}\n", + zone.id, + zone.zone_type.label(), + ) + .unwrap(); + } + } + } + write!(&mut s, "\nerrors:\n").unwrap(); for e in &collection.errors { // Some error strings have OS error numbers in them. We want to @@ -402,19 +519,75 @@ mod test { s } + async fn sim_sled_agent( + log: slog::Logger, + sled_id: Uuid, + zone_id: Uuid, + ) -> sim::Server { + // Start a simulated sled agent. + let config = + sim::Config::for_testing(sled_id, sim::SimMode::Auto, None, None); + let agent = sim::Server::start(&config, &log, false).await.unwrap(); + + // Pretend to put some zones onto this sled. We don't need to test this + // exhaustively here because there are builder tests that exercise a + // variety of different data. We just want to make sure that if the + // sled agent reports something specific (some non-degenerate case), + // then it shows up in the resulting collection. + let sled_url = format!("http://{}/", agent.http_server.local_addr()); + let client = sled_agent_client::Client::new(&sled_url, log); + + let zone_address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0); + client + .omicron_zones_put(&sled_agent_client::types::OmicronZonesConfig { + generation: sled_agent_client::types::Generation::from(3), + zones: vec![sled_agent_client::types::OmicronZoneConfig { + id: zone_id, + underlay_address: *zone_address.ip(), + zone_type: + sled_agent_client::types::OmicronZoneType::Oximeter { + address: zone_address.to_string(), + }, + }], + }) + .await + .expect("failed to write initial zone version to fake sled agent"); + + agent + } + #[tokio::test] async fn test_basic() { - // Set up the stock MGS test setup which includes a couple of fake SPs. - // Then run a collection against it. + // Set up the stock MGS test setup (which includes a couple of fake SPs) + // and a simulated sled agent. Then run a collection against these. let gwtestctx = gateway_test_utils::setup::test_setup("test_basic", SpPort::One) .await; let log = &gwtestctx.logctx.log; + let sled1 = sim_sled_agent( + log.clone(), + "9cb9b78f-5614-440c-b66d-e8e81fab69b0".parse().unwrap(), + "5125277f-0988-490b-ac01-3bba20cc8f07".parse().unwrap(), + ) + .await; + let sled2 = sim_sled_agent( + log.clone(), + "03265caf-da7d-46c7-b1c2-39fa90ce5c65".parse().unwrap(), + "8b88a56f-3eb6-4d80-ba42-75d867bc427d".parse().unwrap(), + ) + .await; + let sled1_url = format!("http://{}/", sled1.http_server.local_addr()); + let sled2_url = format!("http://{}/", sled2.http_server.local_addr()); let mgs_url = format!("http://{}/", gwtestctx.client.bind_address); let mgs_client = Arc::new(gateway_client::Client::new(&mgs_url, log.clone())); - let collector = - Collector::new("test-suite", &[mgs_client], log.clone()); + let sled_enum = StaticSledAgentEnumerator::new([sled1_url, sled2_url]); + let collector = Collector::new( + "test-suite", + &[mgs_client], + &sled_enum, + log.clone(), + ); let collection = collector .collect_all() .await @@ -425,6 +598,7 @@ mod test { let s = dump_collection(&collection); expectorate::assert_contents("tests/output/collector_basic.txt", &s); + sled1.http_server.close().await.unwrap(); gwtestctx.teardown().await; } @@ -444,6 +618,20 @@ mod test { ) .await; let log = &gwtestctx1.logctx.log; + let sled1 = sim_sled_agent( + log.clone(), + "9cb9b78f-5614-440c-b66d-e8e81fab69b0".parse().unwrap(), + "5125277f-0988-490b-ac01-3bba20cc8f07".parse().unwrap(), + ) + .await; + let sled2 = sim_sled_agent( + log.clone(), + "03265caf-da7d-46c7-b1c2-39fa90ce5c65".parse().unwrap(), + "8b88a56f-3eb6-4d80-ba42-75d867bc427d".parse().unwrap(), + ) + .await; + let sled1_url = format!("http://{}/", sled1.http_server.local_addr()); + let sled2_url = format!("http://{}/", sled2.http_server.local_addr()); let mgs_clients = [&gwtestctx1, &gwtestctx2] .into_iter() .map(|g| { @@ -452,7 +640,9 @@ mod test { Arc::new(client) }) .collect::>(); - let collector = Collector::new("test-suite", &mgs_clients, log.clone()); + let sled_enum = StaticSledAgentEnumerator::new([sled1_url, sled2_url]); + let collector = + Collector::new("test-suite", &mgs_clients, &sled_enum, log.clone()); let collection = collector .collect_all() .await @@ -463,6 +653,7 @@ mod test { let s = dump_collection(&collection); expectorate::assert_contents("tests/output/collector_basic.txt", &s); + sled1.http_server.close().await.unwrap(); gwtestctx1.teardown().await; gwtestctx2.teardown().await; } @@ -490,7 +681,9 @@ mod test { Arc::new(client) }; let mgs_clients = &[bad_client, real_client]; - let collector = Collector::new("test-suite", mgs_clients, log.clone()); + let sled_enum = StaticSledAgentEnumerator::empty(); + let collector = + Collector::new("test-suite", mgs_clients, &sled_enum, log.clone()); let collection = collector .collect_all() .await @@ -502,4 +695,50 @@ mod test { gwtestctx.teardown().await; } + + #[tokio::test] + async fn test_sled_agent_failure() { + // Similar to the basic test, but use multiple sled agents, one of which + // is non-functional. + let gwtestctx = gateway_test_utils::setup::test_setup( + "test_sled_agent_failure", + SpPort::One, + ) + .await; + let log = &gwtestctx.logctx.log; + let sled1 = sim_sled_agent( + log.clone(), + "9cb9b78f-5614-440c-b66d-e8e81fab69b0".parse().unwrap(), + "5125277f-0988-490b-ac01-3bba20cc8f07".parse().unwrap(), + ) + .await; + let sled1_url = format!("http://{}/", sled1.http_server.local_addr()); + let sledbogus_url = String::from("http://[100::1]:45678"); + let mgs_url = format!("http://{}/", gwtestctx.client.bind_address); + let mgs_client = + Arc::new(gateway_client::Client::new(&mgs_url, log.clone())); + let sled_enum = + StaticSledAgentEnumerator::new([sled1_url, sledbogus_url]); + let collector = Collector::new( + "test-suite", + &[mgs_client], + &sled_enum, + log.clone(), + ); + let collection = collector + .collect_all() + .await + .expect("failed to carry out collection"); + assert!(!collection.errors.is_empty()); + assert_eq!(collection.collector, "test-suite"); + + let s = dump_collection(&collection); + expectorate::assert_contents( + "tests/output/collector_sled_agent_errors.txt", + &s, + ); + + sled1.http_server.close().await.unwrap(); + gwtestctx.teardown().await; + } } diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 0ce3712942..054be457f3 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -13,10 +13,12 @@ use gateway_client::types::SpState; use gateway_client::types::SpType; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; +use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; use std::sync::Arc; use strum::IntoEnumIterator; +use uuid::Uuid; /// Returns an example Collection used for testing /// @@ -264,19 +266,136 @@ pub fn representative() -> Representative { // We deliberately provide no RoT pages for sled2. + // Report some sled agents. + // + // This first one will match "sled1_bb"'s baseboard information. + let sled_agent_id_basic = + "c5aec1df-b897-49e4-8085-ccd975f9b529".parse().unwrap(); + builder + .found_sled_inventory( + "fake sled agent 1", + sled_agent( + sled_agent_id_basic, + sled_agent_client::types::Baseboard::Gimlet { + identifier: String::from("s1"), + model: String::from("model1"), + revision: 0, + }, + sled_agent_client::types::SledRole::Gimlet, + ), + ) + .unwrap(); + + // Here, we report a different sled *with* baseboard information that + // doesn't match one of the baseboards we found. This is unlikely but could + // happen. Make this one a Scrimlet. + let sled4_bb = Arc::new(BaseboardId { + part_number: String::from("model1"), + serial_number: String::from("s4"), + }); + let sled_agent_id_extra = + "d7efa9c4-833d-4354-a9a2-94ba9715c154".parse().unwrap(); + builder + .found_sled_inventory( + "fake sled agent 4", + sled_agent( + sled_agent_id_extra, + sled_agent_client::types::Baseboard::Gimlet { + identifier: sled4_bb.serial_number.clone(), + model: sled4_bb.part_number.clone(), + revision: 0, + }, + sled_agent_client::types::SledRole::Scrimlet, + ), + ) + .unwrap(); + + // Now report a different sled as though it were a PC. It'd be unlikely to + // see a mix of real Oxide hardware and PCs in the same deployment, but this + // exercises different code paths. + let sled_agent_id_pc = + "c4a5325b-e852-4747-b28a-8aaa7eded8a0".parse().unwrap(); + builder + .found_sled_inventory( + "fake sled agent 5", + sled_agent( + sled_agent_id_pc, + sled_agent_client::types::Baseboard::Pc { + identifier: String::from("fellofftruck1"), + model: String::from("fellofftruck"), + }, + sled_agent_client::types::SledRole::Gimlet, + ), + ) + .unwrap(); + + // Finally, report a sled with unknown baseboard information. This should + // look the same as the PC as far as inventory is concerned but let's verify + // it. + let sled_agent_id_unknown = + "5c5b4cf9-3e13-45fd-871c-f177d6537510".parse().unwrap(); + + builder + .found_sled_inventory( + "fake sled agent 6", + sled_agent( + sled_agent_id_unknown, + sled_agent_client::types::Baseboard::Unknown, + sled_agent_client::types::SledRole::Gimlet, + ), + ) + .unwrap(); + + // Report a representative set of Omicron zones. + // + // We've hand-selected a minimal set of files to cover each type of zone. + // These files were constructed by: + // + // (1) copying the "omicron zones" ledgers from the sleds in a working + // Omicron deployment + // (2) pretty-printing each one with `json --in-place --file FILENAME` + // (3) adjusting the format slightly with + // `jq '{ generation: .omicron_generation, zones: .zones }'` + let sled14_data = include_str!("../example-data/madrid-sled14.json"); + let sled16_data = include_str!("../example-data/madrid-sled16.json"); + let sled17_data = include_str!("../example-data/madrid-sled17.json"); + let sled14: OmicronZonesConfig = serde_json::from_str(sled14_data).unwrap(); + let sled16: OmicronZonesConfig = serde_json::from_str(sled16_data).unwrap(); + let sled17: OmicronZonesConfig = serde_json::from_str(sled17_data).unwrap(); + + let sled14_id = "7612d745-d978-41c8-8ee0-84564debe1d2".parse().unwrap(); + builder + .found_sled_omicron_zones("fake sled 14 agent", sled14_id, sled14) + .unwrap(); + let sled16_id = "af56cb43-3422-4f76-85bf-3f229db5f39c".parse().unwrap(); + builder + .found_sled_omicron_zones("fake sled 15 agent", sled16_id, sled16) + .unwrap(); + let sled17_id = "6eb2a0d9-285d-4e03-afa1-090e4656314b".parse().unwrap(); + builder + .found_sled_omicron_zones("fake sled 15 agent", sled17_id, sled17) + .unwrap(); + Representative { builder, - sleds: [sled1_bb, sled2_bb, sled3_bb], + sleds: [sled1_bb, sled2_bb, sled3_bb, sled4_bb], switch: switch1_bb, psc: psc_bb, + sled_agents: [ + sled_agent_id_basic, + sled_agent_id_extra, + sled_agent_id_pc, + sled_agent_id_unknown, + ], } } pub struct Representative { pub builder: CollectionBuilder, - pub sleds: [Arc; 3], + pub sleds: [Arc; 4], pub switch: Arc, pub psc: Arc, + pub sled_agents: [Uuid; 4], } /// Returns an SP state that can be used to populate a collection for testing @@ -314,3 +433,21 @@ pub fn rot_page(unique: &str) -> RotPage { data_base64: base64::engine::general_purpose::STANDARD.encode(unique), } } + +pub fn sled_agent( + sled_id: Uuid, + baseboard: sled_agent_client::types::Baseboard, + sled_role: sled_agent_client::types::SledRole, +) -> sled_agent_client::types::Inventory { + sled_agent_client::types::Inventory { + baseboard, + reservoir_size: sled_agent_client::types::ByteCount::from(1024), + sled_role, + sled_agent_address: "[::1]:56792".parse().unwrap(), + sled_id, + usable_hardware_threads: 10, + usable_physical_ram: sled_agent_client::types::ByteCount::from( + 1024 * 1024, + ), + } +} diff --git a/nexus/inventory/src/lib.rs b/nexus/inventory/src/lib.rs index e92c46916d..f11af8fede 100644 --- a/nexus/inventory/src/lib.rs +++ b/nexus/inventory/src/lib.rs @@ -20,6 +20,7 @@ mod builder; mod collector; pub mod examples; +mod sled_agent_enumerator; // only exposed for test code to construct collections pub use builder::CollectionBuilder; @@ -27,3 +28,6 @@ pub use builder::CollectorBug; pub use builder::InventoryError; pub use collector::Collector; + +pub use sled_agent_enumerator::SledAgentEnumerator; +pub use sled_agent_enumerator::StaticSledAgentEnumerator; diff --git a/nexus/inventory/src/sled_agent_enumerator.rs b/nexus/inventory/src/sled_agent_enumerator.rs new file mode 100644 index 0000000000..8a1b480e3f --- /dev/null +++ b/nexus/inventory/src/sled_agent_enumerator.rs @@ -0,0 +1,44 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::InventoryError; +use futures::future::BoxFuture; +use futures::FutureExt; + +/// Describes how to find the list of sled agents to collect from +/// +/// In a real system, this queries the database to list all sleds. But for +/// testing the `StaticSledAgentEnumerator` below can be used to avoid a +/// database dependency. +pub trait SledAgentEnumerator { + /// Returns a list of URLs for Sled Agent HTTP endpoints + fn list_sled_agents( + &self, + ) -> BoxFuture<'_, Result, InventoryError>>; +} + +/// Used to provide an explicit list of sled agents to a `Collector` +/// +/// This is mainly used for testing. +pub struct StaticSledAgentEnumerator { + agents: Vec, +} + +impl StaticSledAgentEnumerator { + pub fn new(iter: impl IntoIterator) -> Self { + StaticSledAgentEnumerator { agents: iter.into_iter().collect() } + } + + pub fn empty() -> Self { + Self::new(std::iter::empty()) + } +} + +impl SledAgentEnumerator for StaticSledAgentEnumerator { + fn list_sled_agents( + &self, + ) -> BoxFuture<'_, Result, InventoryError>> { + futures::future::ready(Ok(self.agents.clone())).boxed() + } +} diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index b9894ff184..e59e19967a 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -3,6 +3,8 @@ baseboards: part "FAKE_SIM_GIMLET" serial "SimGimlet01" part "FAKE_SIM_SIDECAR" serial "SimSidecar0" part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + part "sim-gimlet" serial "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" + part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" @@ -68,4 +70,16 @@ rot pages found: CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" +sled agents found: + sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Gimlet) + baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" }) + zone generation: Generation(3) + zones found: + zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d type oximeter + sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Gimlet) + baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) + zone generation: Generation(3) + zones found: + zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter + errors: diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index a50e24ca30..c39d6b249a 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -68,5 +68,7 @@ rot pages found: CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" +sled agents found: + errors: error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error <> diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt new file mode 100644 index 0000000000..9ebf2cece9 --- /dev/null +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -0,0 +1,80 @@ +baseboards: + part "FAKE_SIM_GIMLET" serial "SimGimlet00" + part "FAKE_SIM_GIMLET" serial "SimGimlet01" + part "FAKE_SIM_SIDECAR" serial "SimSidecar0" + part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" + +cabooses: + board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" + board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" + board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" + +rot pages: + data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + +SPs: + baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" + baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" + baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" + baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + +RoTs: + baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" + baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" + baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" + baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + +cabooses found: + SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" + SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" + SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" + SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" + SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" + SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" + SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" + SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" + RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + +rot pages found: + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + +sled agents found: + sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Gimlet) + baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) + zone generation: Generation(3) + zones found: + zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter + +errors: +error: Sled Agent "http://[100::1]:45678": inventory: Communication Error <> diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index f095b094db..5c52fa519b 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -11,11 +11,18 @@ use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::ServiceName; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::DataStore; +use nexus_inventory::InventoryError; +use nexus_types::identity::Asset; use nexus_types::inventory::Collection; use serde_json::json; +use std::num::NonZeroU32; use std::sync::Arc; +/// How many rows to request in each paginated database query +const DB_PAGE_SIZE: u32 = 1024; + /// Background task that reads inventory for the rack pub struct InventoryCollector { datastore: Arc, @@ -123,10 +130,15 @@ async fn inventory_activate( }) .collect::>(); + // Create an enumerator to find sled agents. + let page_size = NonZeroU32::new(DB_PAGE_SIZE).unwrap(); + let sled_enum = DbSledAgentEnumerator { opctx, datastore, page_size }; + // Run a collection. let inventory = nexus_inventory::Collector::new( creator, &mgs_clients, + &sled_enum, opctx.log.clone(), ); let collection = @@ -141,14 +153,64 @@ async fn inventory_activate( Ok(collection) } +/// Determine which sleds to inventory based on what's in the database +/// +/// We only want to inventory what's actually part of the control plane (i.e., +/// has a "sled" record). +struct DbSledAgentEnumerator<'a> { + opctx: &'a OpContext, + datastore: &'a DataStore, + page_size: NonZeroU32, +} + +impl<'a> nexus_inventory::SledAgentEnumerator for DbSledAgentEnumerator<'a> { + fn list_sled_agents( + &self, + ) -> BoxFuture<'_, Result, InventoryError>> { + async { + let mut all_sleds = Vec::new(); + let mut paginator = Paginator::new(self.page_size); + while let Some(p) = paginator.next() { + let records_batch = self + .datastore + .sled_list(&self.opctx, &p.current_pagparams()) + .await + .context("listing sleds")?; + paginator = p.found_batch( + &records_batch, + &|s: &nexus_db_model::Sled| s.id(), + ); + all_sleds.extend( + records_batch + .into_iter() + .map(|sled| format!("http://{}", sled.address())), + ); + } + + Ok(all_sleds) + } + .boxed() + } +} + #[cfg(test)] mod test { use crate::app::background::common::BackgroundTask; + use crate::app::background::inventory_collection::DbSledAgentEnumerator; use crate::app::background::inventory_collection::InventoryCollector; + use nexus_db_model::SledBaseboard; + use nexus_db_model::SledSystemHardware; + use nexus_db_model::SledUpdate; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::DataStoreInventoryTest; + use nexus_inventory::SledAgentEnumerator; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::ByteCount; use omicron_test_utils::dev::poll; + use std::net::Ipv6Addr; + use std::net::SocketAddrV6; + use std::num::NonZeroU32; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -240,4 +302,80 @@ mod test { let latest = datastore.inventory_collections().await.unwrap(); assert_eq!(previous, latest); } + + #[nexus_test(server = crate::Server)] + async fn test_db_sled_enumerator(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let db_enum = DbSledAgentEnumerator { + opctx: &opctx, + datastore: &datastore, + page_size: NonZeroU32::new(3).unwrap(), + }; + + // There will be one sled agent set up as part of the test context. + let found_urls = db_enum.list_sled_agents().await.unwrap(); + assert_eq!(found_urls.len(), 1); + + // Insert some sleds. + let rack_id = Uuid::new_v4(); + let mut sleds = Vec::new(); + for i in 0..64 { + let sled = SledUpdate::new( + Uuid::new_v4(), + SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1200 + i, 0, 0), + SledBaseboard { + serial_number: format!("serial-{}", i), + part_number: String::from("fake-sled"), + revision: 3, + }, + SledSystemHardware { + is_scrimlet: false, + usable_hardware_threads: 12, + usable_physical_ram: ByteCount::from_gibibytes_u32(16) + .into(), + reservoir_size: ByteCount::from_gibibytes_u32(8).into(), + }, + rack_id, + ); + sleds.push(datastore.sled_upsert(sled).await.unwrap()); + } + + // The same enumerator should immediately find all the new sleds. + let mut expected_urls: Vec<_> = found_urls + .into_iter() + .chain(sleds.into_iter().map(|s| format!("http://{}", s.address()))) + .collect(); + expected_urls.sort(); + println!("expected_urls: {:?}", expected_urls); + + let mut found_urls = db_enum.list_sled_agents().await.unwrap(); + found_urls.sort(); + assert_eq!(expected_urls, found_urls); + + // We should get the same result even with a page size of 1. + let db_enum = DbSledAgentEnumerator { + opctx: &opctx, + datastore: &datastore, + page_size: NonZeroU32::new(1).unwrap(), + }; + let mut found_urls = db_enum.list_sled_agents().await.unwrap(); + found_urls.sort(); + assert_eq!(expected_urls, found_urls); + + // We should get the same result even with a page size much larger than + // we need. + let db_enum = DbSledAgentEnumerator { + opctx: &opctx, + datastore: &datastore, + page_size: NonZeroU32::new(1024).unwrap(), + }; + let mut found_urls = db_enum.list_sled_agents().await.unwrap(); + found_urls.sort(); + assert_eq!(expected_urls, found_urls); + } } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 44efc2934e..943490ac04 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -96,9 +96,9 @@ impl super::Nexus { // but for now, connections to sled agents are constructed // on an "as requested" basis. // - // Franky, returning an "Arc" here without a connection pool is a little - // silly; it's not actually used if each client connection exists as a - // one-shot. + // Frankly, returning an "Arc" here without a connection pool is a + // little silly; it's not actually used if each client connection exists + // as a one-shot. let (.., sled) = self.sled_lookup(&self.opctx_alloc, id)?.fetch().await?; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 52ff8910f9..d2ac0405fc 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -51,6 +51,9 @@ use trust_dns_resolver::config::ResolverOpts; use trust_dns_resolver::TokioAsyncResolver; use uuid::Uuid; +pub use sim::TEST_HARDWARE_THREADS; +pub use sim::TEST_RESERVOIR_RAM; + pub mod db; pub mod http_testing; pub mod resource_helpers; @@ -62,13 +65,6 @@ pub const OXIMETER_UUID: &str = "39e6175b-4df2-4730-b11d-cbc1e60a2e78"; pub const PRODUCER_UUID: &str = "a6458b7d-87c3-4483-be96-854d814c20de"; pub const RACK_SUBNET: &str = "fd00:1122:3344:01::/56"; -/// The reported amount of hardware threads for an emulated sled agent. -pub const TEST_HARDWARE_THREADS: u32 = 16; -/// The reported amount of physical RAM for an emulated sled agent. -pub const TEST_PHYSICAL_RAM: u64 = 32 * (1 << 30); -/// The reported amount of VMM reservoir RAM for an emulated sled agent. -pub const TEST_RESERVOIR_RAM: u64 = 16 * (1 << 30); - /// Password for the user created by the test suite /// /// This is only used by the test suite and `omicron-dev run-all` (the latter of @@ -994,32 +990,15 @@ pub async fn start_sled_agent( update_directory: &Utf8Path, sim_mode: sim::SimMode, ) -> Result { - let config = sim::Config { + let config = sim::Config::for_testing( id, sim_mode, - nexus_address, - dropshot: ConfigDropshot { - bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), - request_body_max_bytes: 1024 * 1024, - default_handler_task_mode: HandlerTaskMode::Detached, - }, - // TODO-cleanup this is unused - log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Debug }, - storage: sim::ConfigStorage { - zpools: vec![], - ip: IpAddr::from(Ipv6Addr::LOCALHOST), - }, - updates: sim::ConfigUpdates { - zone_artifact_path: update_directory.to_path_buf(), - }, - hardware: sim::ConfigHardware { - hardware_threads: TEST_HARDWARE_THREADS, - physical_ram: TEST_PHYSICAL_RAM, - reservoir_ram: TEST_RESERVOIR_RAM, - }, - }; - let server = - sim::Server::start(&config, &log).await.map_err(|e| e.to_string())?; + Some(nexus_address), + Some(update_directory), + ); + let server = sim::Server::start(&config, &log, true) + .await + .map_err(|e| e.to_string())?; Ok(server) } diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 9cb94a8484..90ec67c0e6 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -24,3 +24,4 @@ gateway-client.workspace = true omicron-common.workspace = true omicron-passwords.workspace = true omicron-workspace-hack.workspace = true +sled-agent-client.workspace = true diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 77bc73306d..b27d7277ba 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -9,20 +9,31 @@ //! nexus/inventory does not currently know about nexus/db-model and it's //! convenient to separate these concerns.) +use crate::external_api::params::UninitializedSledId; +use crate::external_api::shared::Baseboard; use chrono::DateTime; use chrono::Utc; pub use gateway_client::types::PowerState; pub use gateway_client::types::RotSlot; pub use gateway_client::types::SpType; +use omicron_common::api::external::ByteCount; +pub use sled_agent_client::types::NetworkInterface; +pub use sled_agent_client::types::NetworkInterfaceKind; +pub use sled_agent_client::types::OmicronZoneConfig; +pub use sled_agent_client::types::OmicronZoneDataset; +pub use sled_agent_client::types::OmicronZoneType; +pub use sled_agent_client::types::OmicronZonesConfig; +pub use sled_agent_client::types::SledRole; +pub use sled_agent_client::types::SourceNatConfig; +pub use sled_agent_client::types::Vni; +pub use sled_agent_client::types::ZpoolName; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::net::SocketAddrV6; use std::sync::Arc; use strum::EnumIter; use uuid::Uuid; -use crate::external_api::params::UninitializedSledId; -use crate::external_api::shared::Baseboard; - /// Results of collecting hardware/software inventory from various Omicron /// components /// @@ -89,6 +100,12 @@ pub struct Collection { /// table. pub rot_pages_found: BTreeMap, RotPageFound>>, + + /// Sled Agent information, by *sled* id + pub sled_agents: BTreeMap, + + /// Omicron zones found, by *sled* id + pub omicron_zones: BTreeMap, } impl Collection { @@ -269,3 +286,30 @@ impl IntoRotPage for gateway_client::types::RotCfpa { (which, RotPage { data_base64: self.base64_data }) } } + +/// Inventory reported by sled agent +/// +/// This is a software notion of a sled, distinct from an underlying baseboard. +/// A sled may be on a PC (in dev/test environments) and have no associated +/// baseboard. There might also be baseboards with no associated sled (if +/// they have not been formally added to the control plane). +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SledAgent { + pub time_collected: DateTime, + pub source: String, + pub sled_id: Uuid, + pub baseboard_id: Option>, + pub sled_agent_address: SocketAddrV6, + pub sled_role: SledRole, + pub usable_hardware_threads: u32, + pub usable_physical_ram: ByteCount, + pub reservoir_size: ByteCount, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct OmicronZonesFound { + pub time_collected: DateTime, + pub source: String, + pub sled_id: Uuid, + pub zones: OmicronZonesConfig, +} diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 467fd32cb8..b5b9d3fd5b 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -415,6 +415,30 @@ } } }, + "/inventory": { + "get": { + "summary": "Fetch basic information about this sled", + "operationId": "inventory", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Inventory" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/metrics/collect/{producer_id}": { "get": { "summary": "Collect oximeter samples from the sled agent.", @@ -4916,6 +4940,45 @@ } } }, + "Inventory": { + "description": "Identity and basic status information about this sled agent", + "type": "object", + "properties": { + "baseboard": { + "$ref": "#/components/schemas/Baseboard" + }, + "reservoir_size": { + "$ref": "#/components/schemas/ByteCount" + }, + "sled_agent_address": { + "type": "string" + }, + "sled_id": { + "type": "string", + "format": "uuid" + }, + "sled_role": { + "$ref": "#/components/schemas/SledRole" + }, + "usable_hardware_threads": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "usable_physical_ram": { + "$ref": "#/components/schemas/ByteCount" + } + }, + "required": [ + "baseboard", + "reservoir_size", + "sled_agent_address", + "sled_id", + "sled_role", + "usable_hardware_threads", + "usable_physical_ram" + ] + }, "IpNet": { "oneOf": [ { @@ -6154,6 +6217,7 @@ ] }, "SledRole": { + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", "oneOf": [ { "description": "The sled is a general compute sled.", diff --git a/schema/crdb/22.0.0/up01.sql b/schema/crdb/22.0.0/up01.sql new file mode 100644 index 0000000000..2e7699d24b --- /dev/null +++ b/schema/crdb/22.0.0/up01.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_role AS ENUM ( + 'scrimlet', + 'gimlet' +); diff --git a/schema/crdb/22.0.0/up02.sql b/schema/crdb/22.0.0/up02.sql new file mode 100644 index 0000000000..8f8ddea015 --- /dev/null +++ b/schema/crdb/22.0.0/up02.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( + inv_collection_id UUID NOT NULL, + time_collected TIMESTAMPTZ NOT NULL, + source TEXT NOT NULL, + + sled_id UUID NOT NULL, + + hw_baseboard_id UUID, + + sled_agent_ip INET NOT NULL, + sled_agent_port INT4 NOT NULL, + sled_role omicron.public.sled_role NOT NULL, + usable_hardware_threads INT8 + CHECK (usable_hardware_threads BETWEEN 0 AND 4294967295) NOT NULL, + usable_physical_ram INT8 NOT NULL, + reservoir_size INT8 CHECK (reservoir_size < usable_physical_ram) NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id) +); diff --git a/schema/crdb/22.0.0/up03.sql b/schema/crdb/22.0.0/up03.sql new file mode 100644 index 0000000000..b741141b2b --- /dev/null +++ b/schema/crdb/22.0.0/up03.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_omicron_zones ( + inv_collection_id UUID NOT NULL, + time_collected TIMESTAMPTZ NOT NULL, + source TEXT NOT NULL, + + sled_id UUID NOT NULL, + + generation INT8 NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id) +); diff --git a/schema/crdb/22.0.0/up04.sql b/schema/crdb/22.0.0/up04.sql new file mode 100644 index 0000000000..74620e9685 --- /dev/null +++ b/schema/crdb/22.0.0/up04.sql @@ -0,0 +1,13 @@ +CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( + 'boundary_ntp', + 'clickhouse', + 'clickhouse_keeper', + 'cockroach_db', + 'crucible', + 'crucible_pantry', + 'external_dns', + 'internal_dns', + 'internal_ntp', + 'nexus', + 'oximeter' +); diff --git a/schema/crdb/22.0.0/up05.sql b/schema/crdb/22.0.0/up05.sql new file mode 100644 index 0000000000..11d8684854 --- /dev/null +++ b/schema/crdb/22.0.0/up05.sql @@ -0,0 +1,41 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( + inv_collection_id UUID NOT NULL, + + sled_id UUID NOT NULL, + + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + + dataset_zpool_name TEXT, + + nic_id UUID, + + dns_gz_address INET, + dns_gz_address_index INT8, + + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (inv_collection_id, id) +); diff --git a/schema/crdb/22.0.0/up06.sql b/schema/crdb/22.0.0/up06.sql new file mode 100644 index 0000000000..3d50bcfefd --- /dev/null +++ b/schema/crdb/22.0.0/up06.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( + inv_collection_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (inv_collection_id, id) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index cc61148048..57ce791a03 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2916,6 +2916,161 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_root_of_trust_page ( PRIMARY KEY (inv_collection_id, hw_baseboard_id, which) ); +CREATE TYPE IF NOT EXISTS omicron.public.sled_role AS ENUM ( + -- this sled is directly attached to a Sidecar + 'scrimlet', + -- everything else + 'gimlet' +); + +-- observations from and about sled agents +CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + -- when this observation was made + time_collected TIMESTAMPTZ NOT NULL, + -- URL of the sled agent that reported this data + source TEXT NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- which system this sled agent reports it's running on + -- (foreign key into `hw_baseboard_id` table) + -- This is optional because dev/test systems support running on non-Oxide + -- hardware. + hw_baseboard_id UUID, + + -- Many of the following properties are duplicated from the `sled` table, + -- which predates the current inventory system. + sled_agent_ip INET NOT NULL, + sled_agent_port INT4 NOT NULL, + sled_role omicron.public.sled_role NOT NULL, + usable_hardware_threads INT8 + CHECK (usable_hardware_threads BETWEEN 0 AND 4294967295) NOT NULL, + usable_physical_ram INT8 NOT NULL, + reservoir_size INT8 CHECK (reservoir_size < usable_physical_ram) NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_omicron_zones ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + -- when this observation was made + time_collected TIMESTAMPTZ NOT NULL, + -- URL of the sled agent that reported this data + source TEXT NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- OmicronZonesConfig generation reporting these zones + generation INT8 NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id) +); + +CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( + 'boundary_ntp', + 'clickhouse', + 'clickhouse_keeper', + 'cockroach_db', + 'crucible', + 'crucible_pantry', + 'external_dns', + 'internal_dns', + 'internal_ntp', + 'nexus', + 'oximeter' +); + +-- observations from sled agents about Omicron-managed zones +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique id for this zone + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + + -- SocketAddr of the "primary" service for this zone + -- (what this describes varies by zone type, but all zones have at least one + -- service in them) + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + + -- The remaining properties may be NULL for different kinds of zones. The + -- specific constraints are not enforced at the database layer, basically + -- because it's really complicated to do that and it's not obvious that it's + -- worthwhile. + + -- Some zones have a second service. Like the primary one, the meaning of + -- this is zone-type-dependent. + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + + -- Zones may have an associated dataset. They're currently always on a U.2. + -- The only thing we need to identify it here is the name of the zpool that + -- it's on. + dataset_zpool_name TEXT, + + -- Zones with external IPs have an associated NIC and sockaddr for listening + -- (first is a foreign key into `inv_omicron_zone_nic`) + nic_id UUID, + + -- Properties for internal DNS servers + -- address attached to this zone from outside the sled's subnet + dns_gz_address INET, + dns_gz_address_index INT8, + + -- Properties common to both kinds of NTP zones + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + + -- Properties specific to Nexus zones + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + + -- Source NAT configuration (currently used for boundary NTP only) + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (inv_collection_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( + inv_collection_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (inv_collection_id, id) +); + /*******************************************************************/ /* @@ -3096,7 +3251,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '21.0.0', NULL) + ( TRUE, NOW(), NOW(), '22.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index ee0ebda71e..4b3bc9e432 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -12,15 +12,15 @@ use clap::Parser; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; -use dropshot::HandlerTaskMode; use nexus_client::types as NexusTypes; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::sim::RssArgs; use omicron_sled_agent::sim::{ - run_standalone_server, Config, ConfigHardware, ConfigStorage, - ConfigUpdates, ConfigZpool, SimMode, + run_standalone_server, Config, ConfigHardware, ConfigStorage, ConfigZpool, + SimMode, }; +use sled_hardware::Baseboard; use std::net::SocketAddr; use std::net::SocketAddrV6; use uuid::Uuid; @@ -98,26 +98,31 @@ async fn do_run() -> Result<(), CmdError> { let tmp = camino_tempfile::tempdir() .map_err(|e| CmdError::Failure(anyhow!(e)))?; let config = Config { - id: args.uuid, - sim_mode: args.sim_mode, - nexus_address: args.nexus_addr, dropshot: ConfigDropshot { bind_address: args.sled_agent_addr.into(), - request_body_max_bytes: 1024 * 1024, - default_handler_task_mode: HandlerTaskMode::Detached, + ..Default::default() }, - log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, storage: ConfigStorage { // Create 10 "virtual" U.2s, with 1 TB of storage. zpools: vec![ConfigZpool { size: 1 << 40 }; 10], ip: (*args.sled_agent_addr.ip()).into(), }, - updates: ConfigUpdates { zone_artifact_path: tmp.path().to_path_buf() }, hardware: ConfigHardware { hardware_threads: 32, physical_ram: 64 * (1 << 30), reservoir_ram: 32 * (1 << 30), + baseboard: Baseboard::Gimlet { + identifier: format!("sim-{}", args.uuid), + model: String::from("sim-gimlet"), + revision: 3, + }, }, + ..Config::for_testing( + args.uuid, + args.sim_mode, + Some(args.nexus_addr), + Some(tmp.path()), + ) }; let tls_certificate = match (args.rss_tls_cert, args.rss_tls_key) { @@ -145,5 +150,9 @@ async fn do_run() -> Result<(), CmdError> { tls_certificate, }; - run_standalone_server(&config, &rss_args).await.map_err(CmdError::Failure) + let config_logging = + ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }; + run_standalone_server(&config, &config_logging, &rss_args) + .await + .map_err(CmdError::Failure) } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 26a0d2ddc2..39d1ae26a0 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -10,9 +10,9 @@ use crate::bootstrap::params::AddSledRequest; use crate::params::{ CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, OmicronZonesConfig, - SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, - ZoneBundleMetadata, Zpool, + InstancePutStateResponse, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, + ZoneBundleId, ZoneBundleMetadata, Zpool, }; use crate::sled_agent::Error as SledAgentError; use crate::zone_bundle; @@ -82,6 +82,7 @@ pub fn api() -> SledApiDescription { api.register(host_os_write_start)?; api.register(host_os_write_status_get)?; api.register(host_os_write_status_delete)?; + api.register(inventory)?; Ok(()) } @@ -925,3 +926,15 @@ async fn host_os_write_status_delete( .map_err(|err| HttpError::from(&err))?; Ok(HttpResponseUpdatedNoContent()) } + +/// Fetch basic information about this sled +#[endpoint { + method = GET, + path = "/inventory", +}] +async fn inventory( + request_context: RequestContext, +) -> Result, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.inventory()?)) +} diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index a7d91e2b93..41fc84504e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -10,6 +10,7 @@ pub use illumos_utils::opte::params::DhcpConfig; pub use illumos_utils::opte::params::VpcFirewallRule; pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use illumos_utils::zpool::ZpoolName; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceProperties, InstanceRuntimeState, @@ -20,6 +21,7 @@ use omicron_common::api::internal::shared::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_hardware::Baseboard; pub use sled_hardware::DendriteAsic; use sled_storage::dataset::DatasetKind; use sled_storage::dataset::DatasetName; @@ -805,16 +807,6 @@ pub struct TimeSync { pub correction: f64, } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum SledRole { - /// The sled is a general compute sled. - Gimlet, - /// The sled is attached to the network switch, and has additional - /// responsibilities. - Scrimlet, -} - /// Parameters used to update the zone bundle cleanup context. #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] pub struct CleanupContextUpdate { @@ -825,3 +817,20 @@ pub struct CleanupContextUpdate { /// The new limit on the underlying dataset quota allowed for bundles. pub storage_limit: Option, } + +// Our SledRole and Baseboard types do not have to be identical to the Nexus +// ones, but they generally should be, and this avoids duplication. If it +// becomes easier to maintain a separate copy, we should do that. +pub type SledRole = nexus_client::types::SledRole; + +/// Identity and basic status information about this sled agent +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct Inventory { + pub sled_id: Uuid, + pub sled_agent_address: SocketAddrV6, + pub sled_role: SledRole, + pub baseboard: Baseboard, + pub usable_hardware_threads: u32, + pub usable_physical_ram: ByteCount, + pub reservoir_size: ByteCount, +} diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index 62012a7109..81e11dc1c2 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -5,13 +5,22 @@ //! Interfaces for working with sled agent configuration use crate::updates::ConfigUpdates; +use camino::Utf8Path; use dropshot::ConfigDropshot; -use dropshot::ConfigLogging; use serde::Deserialize; use serde::Serialize; +pub use sled_hardware::Baseboard; +use std::net::Ipv6Addr; use std::net::{IpAddr, SocketAddr}; use uuid::Uuid; +/// The reported amount of hardware threads for an emulated sled agent. +pub const TEST_HARDWARE_THREADS: u32 = 16; +/// The reported amount of physical RAM for an emulated sled agent. +pub const TEST_PHYSICAL_RAM: u64 = 32 * (1 << 30); +/// The reported amount of VMM reservoir RAM for an emulated sled agent. +pub const TEST_RESERVOIR_RAM: u64 = 16 * (1 << 30); + /// How a [`SledAgent`](`super::sled_agent::SledAgent`) simulates object states and /// transitions #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)] @@ -47,6 +56,7 @@ pub struct ConfigHardware { pub hardware_threads: u32, pub physical_ram: u64, pub reservoir_ram: u64, + pub baseboard: Baseboard, } /// Configuration for a sled agent @@ -60,8 +70,6 @@ pub struct Config { pub nexus_address: SocketAddr, /// configuration for the sled agent dropshot server pub dropshot: ConfigDropshot, - /// configuration for the sled agent debug log - pub log: ConfigLogging, /// configuration for the sled agent's storage pub storage: ConfigStorage, /// configuration for the sled agent's updates @@ -69,3 +77,49 @@ pub struct Config { /// configuration to emulate the sled agent's hardware pub hardware: ConfigHardware, } + +impl Config { + pub fn for_testing( + id: Uuid, + sim_mode: SimMode, + nexus_address: Option, + update_directory: Option<&Utf8Path>, + ) -> Config { + // This IP range is guaranteed by RFC 6666 to discard traffic. + // For tests that don't use a Nexus, we use this address to simulate a + // non-functioning Nexus. + let nexus_address = + nexus_address.unwrap_or_else(|| "[100::1]:12345".parse().unwrap()); + // If the caller doesn't care to provide a directory in which to put + // updates, make up a path that doesn't exist. + let update_directory = + update_directory.unwrap_or_else(|| "/nonexistent".into()); + Config { + id, + sim_mode, + nexus_address, + dropshot: ConfigDropshot { + bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), + request_body_max_bytes: 1024 * 1024, + ..Default::default() + }, + storage: ConfigStorage { + zpools: vec![], + ip: IpAddr::from(Ipv6Addr::LOCALHOST), + }, + updates: ConfigUpdates { + zone_artifact_path: update_directory.to_path_buf(), + }, + hardware: ConfigHardware { + hardware_threads: TEST_HARDWARE_THREADS, + physical_ram: TEST_PHYSICAL_RAM, + reservoir_ram: TEST_RESERVOIR_RAM, + baseboard: Baseboard::Gimlet { + identifier: format!("sim-{}", id), + model: String::from("sim-gimlet"), + revision: 3, + }, + }, + } + } +} diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index f77da11b0e..e5d7752511 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -10,7 +10,7 @@ use crate::bootstrap::early_networking::{ use crate::params::{ DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - VpcFirewallRulesEnsureBody, + Inventory, OmicronZonesConfig, VpcFirewallRulesEnsureBody, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -56,6 +56,9 @@ pub fn api() -> SledApiDescription { api.register(uplink_ensure)?; api.register(read_network_bootstore_config)?; api.register(write_network_bootstore_config)?; + api.register(inventory)?; + api.register(omicron_zones_get)?; + api.register(omicron_zones_put)?; Ok(()) } @@ -384,3 +387,43 @@ async fn write_network_bootstore_config( ) -> Result { Ok(HttpResponseUpdatedNoContent()) } + +/// Fetch basic information about this sled +#[endpoint { + method = GET, + path = "/inventory", +}] +async fn inventory( + rqctx: RequestContext>, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.inventory(rqctx.server.local_addr) + .map_err(|e| HttpError::for_internal_error(format!("{:#}", e)))?, + )) +} + +#[endpoint { + method = GET, + path = "/omicron-zones", +}] +async fn omicron_zones_get( + rqctx: RequestContext>, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await)) +} + +#[endpoint { + method = PUT, + path = "/omicron-zones", +}] +async fn omicron_zones_put( + rqctx: RequestContext>, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/sim/mod.rs b/sled-agent/src/sim/mod.rs index 8a730d5988..14d980cf79 100644 --- a/sled-agent/src/sim/mod.rs +++ b/sled-agent/src/sim/mod.rs @@ -17,6 +17,9 @@ mod sled_agent; mod storage; pub use crate::updates::ConfigUpdates; -pub use config::{Config, ConfigHardware, ConfigStorage, ConfigZpool, SimMode}; +pub use config::{ + Baseboard, Config, ConfigHardware, ConfigStorage, ConfigZpool, SimMode, + TEST_HARDWARE_THREADS, TEST_RESERVOIR_RAM, +}; pub use server::{run_standalone_server, RssArgs, Server}; pub use sled_agent::SledAgent; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 1f2fe8e1d8..b214667631 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -50,6 +50,7 @@ impl Server { pub async fn start( config: &Config, log: &Logger, + wait_for_nexus: bool, ) -> Result { info!(log, "setting up sled agent server"); @@ -87,49 +88,61 @@ impl Server { // TODO-robustness if this returns a 400 error, we probably want to // return a permanent error from the `notify_nexus` closure. let sa_address = http_server.local_addr(); - let notify_nexus = || async { - debug!(log, "contacting server nexus"); - nexus_client - .sled_agent_put( - &config.id, - &NexusTypes::SledAgentStartupInfo { - sa_address: sa_address.to_string(), - role: NexusTypes::SledRole::Scrimlet, - baseboard: NexusTypes::Baseboard { - serial_number: format!( - "sim-{}", - &config.id.to_string()[0..8] - ), - part_number: String::from("Unknown"), - revision: 0, + let config_clone = config.clone(); + let log_clone = log.clone(); + let task = tokio::spawn(async move { + let config = config_clone; + let log = log_clone; + let nexus_client = nexus_client.clone(); + let notify_nexus = || async { + debug!(log, "contacting server nexus"); + nexus_client + .sled_agent_put( + &config.id, + &NexusTypes::SledAgentStartupInfo { + sa_address: sa_address.to_string(), + role: NexusTypes::SledRole::Scrimlet, + baseboard: NexusTypes::Baseboard { + serial_number: format!( + "sim-{}", + &config.id.to_string()[0..8] + ), + part_number: String::from("Unknown"), + revision: 0, + }, + usable_hardware_threads: config + .hardware + .hardware_threads, + usable_physical_ram: + NexusTypes::ByteCount::try_from( + config.hardware.physical_ram, + ) + .unwrap(), + reservoir_size: NexusTypes::ByteCount::try_from( + config.hardware.reservoir_ram, + ) + .unwrap(), }, - usable_hardware_threads: config - .hardware - .hardware_threads, - usable_physical_ram: NexusTypes::ByteCount::try_from( - config.hardware.physical_ram, - ) - .unwrap(), - reservoir_size: NexusTypes::ByteCount::try_from( - config.hardware.reservoir_ram, - ) - .unwrap(), - }, - ) - .await - .map_err(BackoffError::transient) - }; - let log_notification_failure = |error, delay| { - warn!(log, "failed to contact nexus, will retry in {:?}", delay; - "error" => ?error); - }; - retry_notify( - retry_policy_internal_service_aggressive(), - notify_nexus, - log_notification_failure, - ) - .await - .expect("Expected an infinite retry loop contacting Nexus"); + ) + .await + .map_err(BackoffError::transient) + }; + let log_notification_failure = |error, delay| { + warn!(log, "failed to contact nexus, will retry in {:?}", delay; + "error" => ?error); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + notify_nexus, + log_notification_failure, + ) + .await + .expect("Expected an infinite retry loop contacting Nexus"); + }); + + if wait_for_nexus { + task.await.unwrap(); + } let mut datasets = vec![]; // Create all the Zpools requested by the config, and allocate a single @@ -262,11 +275,11 @@ pub struct RssArgs { /// - Performs handoff to Nexus pub async fn run_standalone_server( config: &Config, + logging: &dropshot::ConfigLogging, rss_args: &RssArgs, ) -> Result<(), anyhow::Error> { let (drain, registration) = slog_dtrace::with_drain( - config - .log + logging .to_logger("sled-agent") .map_err(|message| anyhow!("initializing logger: {}", message))?, ); @@ -280,7 +293,7 @@ pub async fn run_standalone_server( } // Start the sled agent - let mut server = Server::start(config, &log).await?; + let mut server = Server::start(config, &log, true).await?; info!(log, "sled agent started successfully"); // Start the Internal DNS server diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index a16049dd2f..8a76bf6abc 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -10,41 +10,42 @@ use super::disk::SimDisk; use super::instance::SimInstance; use super::storage::CrucibleData; use super::storage::Storage; - use crate::nexus::NexusClient; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, + InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, }; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; +use anyhow::bail; +use anyhow::Context; +use dropshot::HttpServer; use futures::lock::Mutex; -use omicron_common::api::external::{DiskState, Error, ResourceType}; +use illumos_utils::opte::params::{ + DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, +}; +use nexus_client::types::PhysicalDiskKind; +use omicron_common::address::PROPOLIS_PORT; +use omicron_common::api::external::{ + ByteCount, DiskState, Error, Generation, ResourceType, +}; use omicron_common::api::internal::nexus::{ DiskRuntimeState, SledInstanceState, }; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, }; -use slog::Logger; -use std::net::{IpAddr, Ipv6Addr, SocketAddr}; -use std::sync::Arc; -use uuid::Uuid; - -use std::collections::HashMap; -use std::str::FromStr; - -use dropshot::HttpServer; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; -use nexus_client::types::PhysicalDiskKind; -use omicron_common::address::PROPOLIS_PORT; use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use slog::Logger; +use std::collections::HashMap; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::str::FromStr; +use std::sync::Arc; +use uuid::Uuid; /// Simulates management of the control plane on a sled /// @@ -68,7 +69,8 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, - + config: Config, + fake_zones: Mutex, instance_ensure_state_error: Mutex>, } @@ -161,6 +163,11 @@ impl SledAgent { disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), + config: config.clone(), + fake_zones: Mutex::new(OmicronZonesConfig { + generation: Generation::new(), + zones: vec![], + }), instance_ensure_state_error: Mutex::new(None), }) } @@ -665,4 +672,39 @@ impl SledAgent { *mock_lock = Some((srv, client)); Ok(()) } + + pub fn inventory(&self, addr: SocketAddr) -> anyhow::Result { + let sled_agent_address = match addr { + SocketAddr::V4(_) => { + bail!("sled_agent_ip must be v6 for inventory") + } + SocketAddr::V6(v6) => v6, + }; + Ok(Inventory { + sled_id: self.id, + sled_agent_address, + sled_role: SledRole::Gimlet, + baseboard: self.config.hardware.baseboard.clone(), + usable_hardware_threads: self.config.hardware.hardware_threads, + usable_physical_ram: ByteCount::try_from( + self.config.hardware.physical_ram, + ) + .context("usable_physical_ram")?, + reservoir_size: ByteCount::try_from( + self.config.hardware.reservoir_ram, + ) + .context("reservoir_size")?, + }) + } + + pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { + self.fake_zones.lock().await.clone() + } + + pub async fn omicron_zones_ensure( + &self, + requested_zones: OmicronZonesConfig, + ) { + *self.fake_zones.lock().await = requested_zones; + } } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 621d003268..5bc0f8d257 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -18,8 +18,8 @@ use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, OmicronZonesConfig, SledRole, TimeSync, - VpcFirewallRule, ZoneBundleMetadata, Zpool, + InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, + TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_monitor::UnderlayAccess; @@ -42,7 +42,7 @@ use illumos_utils::zone::ZONE_PREFIX; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, }; -use omicron_common::api::external::Vni; +use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::nexus::{ @@ -214,6 +214,35 @@ impl From for dropshot::HttpError { } } +/// Error returned by `SledAgent::inventory()` +#[derive(thiserror::Error, Debug)] +pub enum InventoryError { + // This error should be impossible because ByteCount supports values from + // [0, i64::MAX] and we don't have anything with that many bytes in the + // system. + #[error(transparent)] + BadByteCount(#[from] ByteCountRangeError), +} + +impl From for omicron_common::api::external::Error { + fn from(inventory_error: InventoryError) -> Self { + match inventory_error { + e @ InventoryError::BadByteCount(..) => { + omicron_common::api::external::Error::internal_error(&format!( + "{:#}", + e + )) + } + } + } +} + +impl From for dropshot::HttpError { + fn from(error: InventoryError) -> Self { + Self::from(omicron_common::api::external::Error::from(error)) + } +} + /// Describes an executing Sled Agent object. /// /// Contains both a connection to the Nexus, as well as managed instances. @@ -1056,6 +1085,37 @@ impl SledAgent { pub(crate) fn boot_disk_os_writer(&self) -> &BootDiskOsWriter { &self.inner.boot_disk_os_writer } + + /// Return basic information about ourselves: identity and status + /// + /// This is basically a GET version of the information we push to Nexus on + /// startup. + pub(crate) fn inventory(&self) -> Result { + let sled_id = self.inner.id; + let sled_agent_address = self.inner.sled_address(); + let is_scrimlet = self.inner.hardware.is_scrimlet(); + let baseboard = self.inner.hardware.baseboard(); + let usable_hardware_threads = + self.inner.hardware.online_processor_count(); + let usable_physical_ram = + self.inner.hardware.usable_physical_ram_bytes(); + let reservoir_size = self.inner.instances.reservoir_size(); + let sled_role = if is_scrimlet { + crate::params::SledRole::Scrimlet + } else { + crate::params::SledRole::Gimlet + }; + + Ok(Inventory { + sled_id, + sled_agent_address, + sled_role, + baseboard, + usable_hardware_threads, + usable_physical_ram: ByteCount::try_from(usable_physical_ram)?, + reservoir_size, + }) + } } async fn register_metric_producer_with_nexus( From f185426f9cca3cda6db57a50a64c0dba41ce91e6 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 4 Jan 2024 16:58:21 -0800 Subject: [PATCH 141/186] sled agent client could use some primitives from omicron_common (#4754) --- clients/sled-agent-client/src/lib.rs | 56 +++---------------- dev-tools/omdb/src/bin/omdb/db.rs | 2 +- nexus/db-model/src/bytecount.rs | 6 -- nexus/db-model/src/generation.rs | 6 -- nexus/db-model/src/inventory.rs | 12 ++-- .../src/db/datastore/network_interface.rs | 2 +- nexus/inventory/src/builder.rs | 5 +- nexus/inventory/src/collector.rs | 3 +- nexus/inventory/src/examples.rs | 7 +-- sled-agent/src/params.rs | 2 +- 10 files changed, 23 insertions(+), 78 deletions(-) diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 89f41d10a6..ee2214c3c2 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -25,6 +25,9 @@ progenitor::generate_api!( //TODO trade the manual transformations later in this file for the // replace directives below? replace = { + ByteCount = omicron_common::api::external::ByteCount, + Generation = omicron_common::api::external::Generation, + Name = omicron_common::api::external::Name, SwitchLocation = omicron_common::api::external::SwitchLocation, Ipv6Network = ipnetwork::Ipv6Network, IpNetwork = ipnetwork::IpNetwork, @@ -80,7 +83,7 @@ impl From propolis_id: s.propolis_id, dst_propolis_id: s.dst_propolis_id, migration_id: s.migration_id, - gen: s.gen.into(), + gen: s.gen, time_updated: s.time_updated, } } @@ -114,18 +117,6 @@ impl From } } -impl From for types::ByteCount { - fn from(s: omicron_common::api::external::ByteCount) -> Self { - Self(s.to_bytes()) - } -} - -impl From for types::Generation { - fn from(s: omicron_common::api::external::Generation) -> Self { - Self(i64::from(&s) as u64) - } -} - impl From for omicron_common::api::internal::nexus::InstanceRuntimeState { @@ -134,7 +125,7 @@ impl From propolis_id: s.propolis_id, dst_propolis_id: s.dst_propolis_id, migration_id: s.migration_id, - gen: s.gen.into(), + gen: s.gen, time_updated: s.time_updated, } } @@ -144,11 +135,7 @@ impl From for omicron_common::api::internal::nexus::VmmRuntimeState { fn from(s: types::VmmRuntimeState) -> Self { - Self { - state: s.state.into(), - gen: s.gen.into(), - time_updated: s.time_updated, - } + Self { state: s.state.into(), gen: s.gen, time_updated: s.time_updated } } } @@ -192,25 +179,13 @@ impl From } } -impl From for omicron_common::api::external::ByteCount { - fn from(s: types::ByteCount) -> Self { - Self::try_from(s.0).unwrap_or_else(|e| panic!("{}: {}", s.0, e)) - } -} - -impl From for omicron_common::api::external::Generation { - fn from(s: types::Generation) -> Self { - Self::try_from(s.0 as i64).unwrap() - } -} - impl From for types::DiskRuntimeState { fn from(s: omicron_common::api::internal::nexus::DiskRuntimeState) -> Self { Self { disk_state: s.disk_state.into(), - gen: s.gen.into(), + gen: s.gen, time_updated: s.time_updated, } } @@ -242,7 +217,7 @@ impl From fn from(s: types::DiskRuntimeState) -> Self { Self { disk_state: s.disk_state.into(), - gen: s.gen.into(), + gen: s.gen, time_updated: s.time_updated, } } @@ -268,19 +243,6 @@ impl From for omicron_common::api::external::DiskState { } } -impl From<&omicron_common::api::external::Name> for types::Name { - fn from(s: &omicron_common::api::external::Name) -> Self { - Self::try_from(<&str>::from(s)) - .unwrap_or_else(|e| panic!("{}: {}", s, e)) - } -} - -impl From for omicron_common::api::external::Name { - fn from(s: types::Name) -> Self { - Self::try_from(s.as_str().to_owned()).unwrap() - } -} - impl From for types::Vni { fn from(v: omicron_common::api::external::Vni) -> Self { Self(u32::from(v)) @@ -541,7 +503,7 @@ impl From Self { id: s.id, kind: s.kind.into(), - name: (&s.name).into(), + name: s.name, ip: s.ip, mac: s.mac.into(), subnet: s.subnet.into(), diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index ad7ab35455..f58fd57b9d 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2738,7 +2738,7 @@ fn inv_collection_print_sleds(collection: &Collection) { ); println!( " zones generation: {} (count: {})", - *zones.zones.generation, + zones.zones.generation, zones.zones.zones.len() ); diff --git a/nexus/db-model/src/bytecount.rs b/nexus/db-model/src/bytecount.rs index 9ea13956b7..92a01db43f 100644 --- a/nexus/db-model/src/bytecount.rs +++ b/nexus/db-model/src/bytecount.rs @@ -53,12 +53,6 @@ where } } -impl From for sled_agent_client::types::ByteCount { - fn from(b: ByteCount) -> Self { - Self(b.to_bytes()) - } -} - impl From for ByteCount { fn from(bs: BlockSize) -> Self { Self(bs.to_bytes().into()) diff --git a/nexus/db-model/src/generation.rs b/nexus/db-model/src/generation.rs index b7e3a2b954..751cb98f3c 100644 --- a/nexus/db-model/src/generation.rs +++ b/nexus/db-model/src/generation.rs @@ -60,9 +60,3 @@ where .map_err(|e| e.into()) } } - -impl From for sled_agent_client::types::Generation { - fn from(g: Generation) -> Self { - Self(i64::from(&g.0) as u64) - } -} diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 47e2033718..4e3e5fad56 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -675,7 +675,7 @@ impl InvSledOmicronZones { time_collected: zones_found.time_collected, source: zones_found.source.clone(), sled_id: zones_found.sled_id, - generation: Generation(zones_found.zones.generation.clone().into()), + generation: Generation(zones_found.zones.generation), } } @@ -687,7 +687,7 @@ impl InvSledOmicronZones { source: self.source, sled_id: self.sled_id, zones: nexus_types::inventory::OmicronZonesConfig { - generation: self.generation.0.into(), + generation: *self.generation, zones: Vec::new(), }, } @@ -1123,11 +1123,7 @@ impl InvOmicronZoneNic { Ok(Some(InvOmicronZoneNic { inv_collection_id, id: nic.id, - name: Name::from( - omicron_common::api::external::Name::from( - nic.name.clone(), - ), - ), + name: Name::from(nic.name.clone()), ip: IpNetwork::from(nic.ip), mac: MacAddr::from( omicron_common::api::external::MacAddr::from( @@ -1155,7 +1151,7 @@ impl InvOmicronZoneNic { zone_id, ), mac: (*self.mac).into(), - name: (&(*self.name)).into(), + name: self.name.into(), primary: self.is_primary, slot: *self.slot, vni: nexus_types::inventory::Vni::from(*self.vni), diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 4d4e43c9a7..be12ea5231 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -76,7 +76,7 @@ impl From for sled_client_types::NetworkInterface { sled_client_types::NetworkInterface { id: nic.id, kind, - name: sled_client_types::Name::from(&nic.name.0), + name: nic.name.into(), ip: nic.ip.ip(), mac: sled_client_types::MacAddr::from(nic.mac.0), subnet: sled_client_types::IpNet::from(ip_subnet), diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 602655ef0b..62d338c1ee 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -26,7 +26,6 @@ use nexus_types::inventory::RotPageWhich; use nexus_types::inventory::RotState; use nexus_types::inventory::ServiceProcessor; use nexus_types::inventory::SledAgent; -use omicron_common::api::external::ByteCount; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::sync::Arc; @@ -455,8 +454,8 @@ impl CollectionBuilder { sled_role: inventory.sled_role, baseboard_id, usable_hardware_threads: inventory.usable_hardware_threads, - usable_physical_ram: ByteCount::from(inventory.usable_physical_ram), - reservoir_size: ByteCount::from(inventory.reservoir_size), + usable_physical_ram: inventory.usable_physical_ram, + reservoir_size: inventory.reservoir_size, time_collected: now(), sled_id, }; diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 9b335d3ee4..ab9af3f9e0 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -374,6 +374,7 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use nexus_types::inventory::Collection; + use omicron_common::api::external::Generation; use omicron_sled_agent::sim; use std::fmt::Write; use std::net::Ipv6Addr; @@ -540,7 +541,7 @@ mod test { let zone_address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0); client .omicron_zones_put(&sled_agent_client::types::OmicronZonesConfig { - generation: sled_agent_client::types::Generation::from(3), + generation: Generation::from(3), zones: vec![sled_agent_client::types::OmicronZoneConfig { id: zone_id, underlay_address: *zone_address.ip(), diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 054be457f3..93ba139c85 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -16,6 +16,7 @@ use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; +use omicron_common::api::external::ByteCount; use std::sync::Arc; use strum::IntoEnumIterator; use uuid::Uuid; @@ -441,13 +442,11 @@ pub fn sled_agent( ) -> sled_agent_client::types::Inventory { sled_agent_client::types::Inventory { baseboard, - reservoir_size: sled_agent_client::types::ByteCount::from(1024), + reservoir_size: ByteCount::from(1024), sled_role, sled_agent_address: "[::1]:56792".parse().unwrap(), sled_id, usable_hardware_threads: 10, - usable_physical_ram: sled_agent_client::types::ByteCount::from( - 1024 * 1024, - ), + usable_physical_ram: ByteCount::from(1024 * 1024), } } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 41fc84504e..8417546e3b 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -298,7 +298,7 @@ pub struct OmicronZonesConfig { impl From for sled_agent_client::types::OmicronZonesConfig { fn from(local: OmicronZonesConfig) -> Self { Self { - generation: local.generation.into(), + generation: local.generation, zones: local.zones.into_iter().map(|s| s.into()).collect(), } } From f6adabf063ca27cda145df9320e4e62531b05ebb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 05:26:42 +0000 Subject: [PATCH 142/186] Update taiki-e/install-action digest to 115b656 (#4756) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`56ab793` -> `115b656`](https://togithub.com/taiki-e/install-action/compare/56ab793...115b656) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 4d9812a44e..0627ea1563 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@56ab7930c591507f833cbaed864d201386d518a8 # v2 + uses: taiki-e/install-action@115b656342518960cf3dbf5c01f62b684985ca11 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From b17eed8ff67f02c92ed10193a0ca91582f75936d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 00:33:32 -0800 Subject: [PATCH 143/186] Update Rust crate prettyplease to 0.2.16 (#4759) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cc7d09d82..c5d0ce5d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6156,9 +6156,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", "syn 2.0.46", diff --git a/Cargo.toml b/Cargo.toml index a9387fac6d..55ab6c0675 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -291,7 +291,7 @@ postgres-protocol = "0.6.6" predicates = "3.0.4" pretty_assertions = "1.4.0" pretty-hex = "0.4.0" -prettyplease = "0.2.15" +prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } From 69733d83fc56dcd46753ae5dd4c3a101af327892 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 00:34:07 -0800 Subject: [PATCH 144/186] Update Rust crate libsw to 3.3.1 (#4757) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5d0ce5d34..3e8ad7495b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3721,9 +3721,9 @@ dependencies = [ [[package]] name = "libsw" -version = "3.3.0" +version = "3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "610cd929d24f634af855498b575263c44d541a0e28c21d595968a6e25fe190f9" +checksum = "0673364c1ef7a1674241dbad9ba2415354103d6126451f01eeb7aaa25d6b4fce" dependencies = [ "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 55ab6c0675..bc5ba0bc45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -359,7 +359,7 @@ strum = { version = "0.25", features = [ "derive" ] } subprocess = "0.2.9" supports-color = "2.1.0" swrite = "0.1.0" -libsw = { version = "3.3.0", features = ["tokio"] } +libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } tabled = "0.14" tar = "0.4" From bf4983327139208bac07c221077d3be7d9cee769 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 5 Jan 2024 13:13:04 -0500 Subject: [PATCH 145/186] IP pools data model and API rework (#4261) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #2148 Closes #4002 Closes #4003 Closes #4006 ## Background #3985 (and followups #3998 and #4007) made it possible to associate an IP pool with a silo so that instances created in that silo would get their ephemeral IPs from said pool by default (i.e., without the user having to say anything other than "I want an ephemeral IP"). An IP pool associated with a silo was not accessible for ephemeral IP allocation from other silos — if a disallowed pool was specified by name at instance create time, the request would 404. However! That was the quick version, and the data model left much to be desired. The relation was modeled by adding a nullable `silo_id` and sort-of-not-really-nullable `is_default` column directly on the IP pool table, which has the following limitations (and there are probably more): * A given IP pool could only be associated with at most one silo, could not be shared * The concept of `default` was treated as a property of the pool itself, rather than a property of the _association_ with another resource, which is quite strange. Even if you could associate the pool with multiple silos, you could not have it be the default for one and not for the other * There is no way to create an IP pool without associating it with either the fleet or a silo * Extending this model to allow association at the project level would be inelegant — we'd have to add a `project_id` column (which I did in #3981 before removing it in #3985) More broadly (and vaguely), the idea of an IP pool "knowing" about silos or projects doesn't really make sense. Entities aren't really supposed to know about each other unless they have a parent-child relationship. ## Changes in this PR ### No such thing as fleet-scoped pool, only silo Thanks to @zephraph for encouraging me to make this change. It is dramatically easier to explain "link silo to IP pool" than it is to explain "link resource (fleet or silo) to IP pool". The way to recreate the behavior of a single default pool for the fleet is to simply associate a pool with all silos. Data migrations ensure that existing fleet-scoped pools will be associated with all silos. There can only be one default pool for a silo, so in the rare case where pool A is a fleet default and pool B is default on silo S, we associate both A and B with S, but only B is made silo default pool. ### API These endpoints are added. They're pretty self-explanatory. ``` ip_pool_silo_link POST /v1/system/ip-pools/{pool}/silos ip_pool_silo_list GET /v1/system/ip-pools/{pool}/silos ip_pool_silo_unlink DELETE /v1/system/ip-pools/{pool}/silos/{silo} ip_pool_silo_update PUT /v1/system/ip-pools/{pool}/silos/{silo} ``` The `silo_id` and `is_default` fields are removed from the `IpPool` response as they are now a property of the `IpPoolLink`, not the pool itself. I also fixed the silo-scoped IP pools list (`/v1/ip-pools`) and fetch (`/v1/ip-pools/{pool}`) endpoints, which a) did not actually filter for the current silo, allowing any user to fetch any pool, and b) took a spurious `project` query param that didn't do anything. ### DB The association between IP pools and fleet or silo (or eventually projects, but not here) is now modeled through a polymorphic join table called `ip_pool_resource`: ip_pool_id | resource_type | resource_id | is_default -- | -- | -- | -- 123 | silo | 23 | true 123 | silo | 4 | false ~~65~~ | ~~fleet~~ | ~~FLEET_ID~~ | ~~true~~ Now, instead of setting the association with a silo or fleet at IP pool create or update time, there are separate endpoints for adding and removing an association. A pool can be associated with any number of resources, but a unique index ensures that a given resource can only have one default pool. ### Default IP pool logic If an instance ephemeral IP or a floating IP is created **with a pool specified**, we simply use that pool if it exists and is linked to the user's silo. If an instance ephemeral IP or a floating IP is created **without a pool unspecified**, we look for a default pool for the current silo. If there is a pool linked with the current silo with `is_default=true`, use that. Otherwise, there is no default pool for the given scope and IP allocation will fail, which means the instance create or floating IP create request will fail. The difference introduced in this PR is that we do not fall back to fleet default if there is no silo default because we have removed the concept of a fleet-scoped pool. ### Tests and test helpers This is the source of a lot of noise in this PR. Because there can no longer be a fleet default pool, we can no longer rely on that for tests. The test setup was really confusing. We assumed a default IP pool existed, but we still had to populate it (add a range) if we had to do anything with it. Now, we don't assume it exists, we create it and add a range and associate it with a silo all in one helper. ## What do customers have to do when they upgrade? They should not _have_ to do anything at upgrade time. If they were relying on a single fleet default pool to automatically be used by new silos, when they create silos in the future they will have to manually associate each new silo with the desired pool. We are working on ways to make that easier or more automatic, but that's not in this change. It is less urgent because silo creation is an infrequent operation. If they are _not_ using the previously fleet default IP pool named `default` and do not want it to exist, they can simply delete any IP ranges it contains, unlink it from all silos and delete it. If they are not using it, there should not be any IPs allocated from it (which means they can delete it). --------- Co-authored-by: Justin Bennett --- common/src/api/external/mod.rs | 1 + end-to-end-tests/src/bin/bootstrap.rs | 23 +- end-to-end-tests/src/helpers/ctx.rs | 4 + nexus/db-model/src/ip_pool.rs | 56 +- nexus/db-model/src/schema.rs | 19 +- .../src/db/datastore/external_ip.rs | 65 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 751 ++++++++++++++++-- nexus/db-queries/src/db/datastore/project.rs | 30 - nexus/db-queries/src/db/datastore/rack.rs | 55 +- nexus/db-queries/src/db/pool_connection.rs | 1 + .../db-queries/src/db/queries/external_ip.rs | 58 +- nexus/src/app/ip_pool.rs | 193 ++++- nexus/src/app/project.rs | 38 - nexus/src/app/sagas/disk_create.rs | 20 +- nexus/src/app/sagas/disk_delete.rs | 13 +- nexus/src/app/sagas/instance_create.rs | 6 +- nexus/src/app/sagas/instance_delete.rs | 4 +- nexus/src/app/sagas/instance_migrate.rs | 8 +- nexus/src/app/sagas/instance_start.rs | 4 +- nexus/src/app/sagas/snapshot_create.rs | 50 +- nexus/src/app/sagas/vpc_create.rs | 4 +- nexus/src/external_api/http_entrypoints.rs | 186 ++++- nexus/test-utils/src/resource_helpers.rs | 164 +++- nexus/tests/integration_tests/disks.rs | 52 +- nexus/tests/integration_tests/endpoints.rs | 37 +- nexus/tests/integration_tests/external_ips.rs | 112 ++- nexus/tests/integration_tests/instances.rs | 405 +++++++--- nexus/tests/integration_tests/ip_pools.rs | 730 ++++++++++------- nexus/tests/integration_tests/metrics.rs | 6 +- nexus/tests/integration_tests/pantry.rs | 36 +- nexus/tests/integration_tests/projects.rs | 6 +- nexus/tests/integration_tests/quotas.rs | 12 +- nexus/tests/integration_tests/sleds.rs | 4 +- nexus/tests/integration_tests/snapshots.rs | 31 +- .../integration_tests/subnet_allocation.rs | 4 +- nexus/tests/integration_tests/unauthorized.rs | 22 +- nexus/tests/integration_tests/utilization.rs | 4 +- .../integration_tests/volume_management.rs | 32 +- nexus/tests/integration_tests/vpc_subnets.rs | 4 +- nexus/tests/output/nexus_tags.txt | 4 + nexus/types/src/external_api/params.rs | 36 +- nexus/types/src/external_api/views.rs | 14 +- openapi/nexus.json | 326 +++++++- schema/crdb/23.0.0/up1.sql | 3 + schema/crdb/23.0.0/up2.sql | 8 + schema/crdb/23.0.0/up3.sql | 5 + schema/crdb/23.0.0/up4.sql | 38 + schema/crdb/23.0.0/up5.sql | 13 + schema/crdb/23.0.1/README.md | 1 + schema/crdb/23.0.1/up1.sql | 1 + schema/crdb/23.0.1/up2.sql | 3 + schema/crdb/dbinit.sql | 51 +- 52 files changed, 2633 insertions(+), 1120 deletions(-) create mode 100644 schema/crdb/23.0.0/up1.sql create mode 100644 schema/crdb/23.0.0/up2.sql create mode 100644 schema/crdb/23.0.0/up3.sql create mode 100644 schema/crdb/23.0.0/up4.sql create mode 100644 schema/crdb/23.0.0/up5.sql create mode 100644 schema/crdb/23.0.1/README.md create mode 100644 schema/crdb/23.0.1/up1.sql create mode 100644 schema/crdb/23.0.1/up2.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 3b05c58df3..312d400d2f 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -739,6 +739,7 @@ pub enum ResourceType { LoopbackAddress, SwitchPortSettings, IpPool, + IpPoolResource, InstanceNetworkInterface, PhysicalDisk, Rack, diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index 9ddd872bc2..21e59647ae 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -4,7 +4,8 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, - DiskCreate, DiskSource, IpRange, Ipv4Range, SiloQuotasUpdate, + DiskCreate, DiskSource, IpPoolCreate, IpPoolSiloLink, IpRange, Ipv4Range, + NameOrId, SiloQuotasUpdate, }; use oxide_client::{ ClientDisksExt, ClientHiddenExt, ClientProjectsExt, @@ -38,9 +39,27 @@ async fn main() -> Result<()> { // ===== CREATE IP POOL ===== // eprintln!("creating IP pool... {:?} - {:?}", first, last); + let pool_name = "default"; + client + .ip_pool_create() + .body(IpPoolCreate { + name: pool_name.parse().unwrap(), + description: "Default IP pool".to_string(), + }) + .send() + .await?; + client + .ip_pool_silo_link() + .pool(pool_name) + .body(IpPoolSiloLink { + silo: NameOrId::Name(params.silo_name().parse().unwrap()), + is_default: true, + }) + .send() + .await?; client .ip_pool_range_add() - .pool("default") + .pool(pool_name) .body(IpRange::V4(Ipv4Range { first, last })) .send() .await?; diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 0132feafeb..e4bf61356c 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -287,6 +287,10 @@ impl ClientParams { .build()?; Ok(Client::new_with_client(&base_url, reqwest_client)) } + + pub fn silo_name(&self) -> String { + self.rss_config.recovery_silo.silo_name.to_string() + } } async fn wait_for_records( diff --git a/nexus/db-model/src/ip_pool.rs b/nexus/db-model/src/ip_pool.rs index 8ad78af07b..bec1113151 100644 --- a/nexus/db-model/src/ip_pool.rs +++ b/nexus/db-model/src/ip_pool.rs @@ -5,8 +5,10 @@ //! Model types for IP Pools and the CIDR blocks therein. use crate::collection::DatastoreCollectionConfig; +use crate::impl_enum_type; use crate::schema::ip_pool; use crate::schema::ip_pool_range; +use crate::schema::ip_pool_resource; use crate::Name; use chrono::DateTime; use chrono::Utc; @@ -35,42 +37,23 @@ pub struct IpPool { /// Child resource generation number, for optimistic concurrency control of /// the contained ranges. pub rcgen: i64, - - /// Silo, if IP pool is associated with a particular silo. One special use - /// for this is associating a pool with the internal silo oxide-internal, - /// which is used for internal services. If there is no silo ID, the - /// pool is considered a fleet-wide pool and will be used for allocating - /// instance IPs in silos that don't have their own pool. - pub silo_id: Option, - - pub is_default: bool, } impl IpPool { - pub fn new( - pool_identity: &external::IdentityMetadataCreateParams, - silo_id: Option, - is_default: bool, - ) -> Self { + pub fn new(pool_identity: &external::IdentityMetadataCreateParams) -> Self { Self { identity: IpPoolIdentity::new( Uuid::new_v4(), pool_identity.clone(), ), rcgen: 0, - silo_id, - is_default, } } } impl From for views::IpPool { fn from(pool: IpPool) -> Self { - Self { - identity: pool.identity(), - silo_id: pool.silo_id, - is_default: pool.is_default, - } + Self { identity: pool.identity() } } } @@ -93,6 +76,37 @@ impl From for IpPoolUpdate { } } +impl_enum_type!( + #[derive(SqlType, Debug, Clone, Copy, QueryId)] + #[diesel(postgres_type(name = "ip_pool_resource_type"))] + pub struct IpPoolResourceTypeEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[diesel(sql_type = IpPoolResourceTypeEnum)] + pub enum IpPoolResourceType; + + Silo => b"silo" +); + +#[derive(Queryable, Insertable, Selectable, Clone, Debug)] +#[diesel(table_name = ip_pool_resource)] +pub struct IpPoolResource { + pub ip_pool_id: Uuid, + pub resource_type: IpPoolResourceType, + pub resource_id: Uuid, + pub is_default: bool, +} + +impl From for views::IpPoolSilo { + fn from(assoc: IpPoolResource) -> Self { + Self { + ip_pool_id: assoc.ip_pool_id, + silo_id: assoc.resource_id, + is_default: assoc.is_default, + } + } +} + /// A range of IP addresses for an IP Pool. #[derive(Queryable, Insertable, Selectable, Clone, Debug)] #[diesel(table_name = ip_pool_range)] diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 791afa6de4..02bdd2c349 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(22, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(23, 0, 1); table! { disk (id) { @@ -504,7 +504,14 @@ table! { time_modified -> Timestamptz, time_deleted -> Nullable, rcgen -> Int8, - silo_id -> Nullable, + } +} + +table! { + ip_pool_resource (ip_pool_id, resource_type, resource_id) { + ip_pool_id -> Uuid, + resource_type -> crate::IpPoolResourceTypeEnum, + resource_id -> Uuid, is_default -> Bool, } } @@ -1426,8 +1433,9 @@ allow_tables_to_appear_in_same_query!( ); joinable!(system_update_component_update -> component_update (component_update_id)); -allow_tables_to_appear_in_same_query!(ip_pool_range, ip_pool); +allow_tables_to_appear_in_same_query!(ip_pool_range, ip_pool, ip_pool_resource); joinable!(ip_pool_range -> ip_pool (ip_pool_id)); +joinable!(ip_pool_resource -> ip_pool (ip_pool_id)); allow_tables_to_appear_in_same_query!(inv_collection, inv_collection_error); joinable!(inv_collection_error -> inv_collection (inv_collection_id)); @@ -1478,6 +1486,11 @@ allow_tables_to_appear_in_same_query!( allow_tables_to_appear_in_same_query!(dns_zone, dns_version, dns_name); allow_tables_to_appear_in_same_query!(external_ip, service); +// used for query to check whether an IP pool association has any allocated IPs before deleting +allow_tables_to_appear_in_same_query!(external_ip, instance); +allow_tables_to_appear_in_same_query!(external_ip, project); +allow_tables_to_appear_in_same_query!(external_ip, ip_pool_resource); + allow_tables_to_appear_in_same_query!( switch_port, switch_port_settings_route_config diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 2adeebd819..02ce950118 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -76,22 +76,18 @@ impl DataStore { .fetch_for(authz::Action::CreateChild) .await?; - // If the named pool conflicts with user's current scope, i.e., - // if it has a silo and it's different from the current silo, - // then as far as IP allocation is concerned, that pool doesn't - // exist. If the pool has no silo, it's fleet-scoped and can - // always be used. - let authz_silo_id = opctx.authn.silo_required()?.id(); - if let Some(pool_silo_id) = pool.silo_id { - if pool_silo_id != authz_silo_id { - return Err(authz_pool.not_found()); - } + // If this pool is not linked to the current silo, 404 + if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { + return Err(authz_pool.not_found()); } pool } // If no name given, use the default logic - None => self.ip_pools_fetch_default(&opctx).await?, + None => { + let (.., pool) = self.ip_pools_fetch_default(&opctx).await?; + pool + } }; let pool_id = pool.identity.id; @@ -147,36 +143,29 @@ impl DataStore { ) -> CreateResult { let ip_id = Uuid::new_v4(); - // See `allocate_instance_ephemeral_ip`: we're replicating - // its strucutre to prevent cross-silo pool access. - let pool_id = if let Some(name_or_id) = params.pool { - let (.., authz_pool, pool) = match name_or_id { - NameOrId::Name(name) => { - LookupPath::new(opctx, self) - .ip_pool_name(&Name(name)) - .fetch_for(authz::Action::CreateChild) - .await? - } - NameOrId::Id(id) => { - LookupPath::new(opctx, self) - .ip_pool_id(id) - .fetch_for(authz::Action::CreateChild) - .await? - } - }; - - let authz_silo_id = opctx.authn.silo_required()?.id(); - if let Some(pool_silo_id) = pool.silo_id { - if pool_silo_id != authz_silo_id { - return Err(authz_pool.not_found()); - } + // TODO: NameOrId resolution should happen a level higher, in the nexus function + let (.., authz_pool, pool) = match params.pool { + Some(NameOrId::Name(name)) => { + LookupPath::new(opctx, self) + .ip_pool_name(&Name(name)) + .fetch_for(authz::Action::Read) + .await? + } + Some(NameOrId::Id(id)) => { + LookupPath::new(opctx, self) + .ip_pool_id(id) + .fetch_for(authz::Action::Read) + .await? } + None => self.ip_pools_fetch_default(opctx).await?, + }; - pool - } else { - self.ip_pools_fetch_default(opctx).await? + let pool_id = pool.id(); + + // If this pool is not linked to the current silo, 404 + if self.ip_pool_fetch_link(opctx, pool_id).await.is_err() { + return Err(authz_pool.not_found()); } - .id(); let data = if let Some(ip) = params.address { IncompleteExternalIp::for_floating_explicit( diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 4497e3f2b4..f51f54d592 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -11,19 +11,27 @@ use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; +use crate::db::error::public_error_from_diesel_lookup; use crate::db::error::ErrorHandler; use crate::db::fixed_data::silo::INTERNAL_SILO_ID; use crate::db::identity::Resource; +use crate::db::model::ExternalIp; +use crate::db::model::IpKind; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; +use crate::db::model::IpPoolResource; +use crate::db::model::IpPoolResourceType; use crate::db::model::IpPoolUpdate; use crate::db::model::Name; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::ip_pool::FilterOverlappingIpRanges; +use crate::db::TransactionError; +use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; use nexus_types::external_api::shared::IpRange; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -31,6 +39,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; @@ -46,29 +55,110 @@ impl DataStore { opctx: &OpContext, pagparams: &PaginatedBy<'_>, ) -> ListResultVec { - use db::schema::ip_pool::dsl; + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + opctx .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) .await?; match pagparams { PaginatedBy::Id(pagparams) => { - paginated(dsl::ip_pool, dsl::id, pagparams) + paginated(ip_pool::table, ip_pool::id, pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + ip_pool::table, + ip_pool::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .left_outer_join(ip_pool_resource::table) + .filter( + ip_pool_resource::resource_id + .ne(*INTERNAL_SILO_ID) + // resource_id is not nullable -- null here means the + // pool has no entry in the join table + .or(ip_pool_resource::resource_id.is_null()), + ) + .filter(ip_pool::time_deleted.is_null()) + .select(IpPool::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List IP pools linked to the current silo + pub async fn silo_ip_pools_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + // From the developer user's point of view, we treat IP pools linked to + // their silo as silo resources, so they can list them if they can list + // silo children + let authz_silo = + opctx.authn.silo_required().internal_context("listing IP pools")?; + opctx.authorize(authz::Action::ListChildren, &authz_silo).await?; + + let silo_id = authz_silo.id(); + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(ip_pool::table, ip_pool::id, pagparams) } PaginatedBy::Name(pagparams) => paginated( - dsl::ip_pool, - dsl::name, + ip_pool::table, + ip_pool::name, &pagparams.map_name(|n| Name::ref_cast(n)), ), } - // != excludes nulls so we explicitly include them - .filter(dsl::silo_id.ne(*INTERNAL_SILO_ID).or(dsl::silo_id.is_null())) - .filter(dsl::time_deleted.is_null()) + .inner_join(ip_pool_resource::table) + .filter( + ip_pool_resource::resource_type + .eq(IpPoolResourceType::Silo) + .and(ip_pool_resource::resource_id.eq(silo_id)), + ) + .filter(ip_pool::time_deleted.is_null()) .select(db::model::IpPool::as_select()) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Look up whether the given pool is available to users in the current + /// silo, i.e., whether there is an entry in the association table linking + /// the pool with that silo + pub async fn ip_pool_fetch_link( + &self, + opctx: &OpContext, + ip_pool_id: Uuid, + ) -> LookupResult { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + let authz_silo = opctx.authn.silo_required().internal_context( + "fetching link from an IP pool to current silo", + )?; + + ip_pool::table + .inner_join(ip_pool_resource::table) + .filter( + ip_pool_resource::resource_type + .eq(IpPoolResourceType::Silo) + .and(ip_pool_resource::resource_id.eq(authz_silo.id())), + ) + .filter(ip_pool::id.eq(ip_pool_id)) + .filter(ip_pool::time_deleted.is_null()) + .select(IpPoolResource::as_select()) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Look up the default IP pool for the current silo. If there is no default /// at silo scope, fall back to the next level up, namely the fleet default. /// There should always be a default pool at the fleet level, though this @@ -77,8 +167,9 @@ impl DataStore { pub async fn ip_pools_fetch_default( &self, opctx: &OpContext, - ) -> LookupResult { - use db::schema::ip_pool::dsl; + ) -> LookupResult<(authz::IpPool, IpPool)> { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; let authz_silo_id = opctx.authn.silo_required()?.id(); @@ -91,23 +182,47 @@ impl DataStore { // .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) // .await?; - dsl::ip_pool - .filter(dsl::silo_id.eq(authz_silo_id).or(dsl::silo_id.is_null())) - .filter(dsl::is_default.eq(true)) - .filter(dsl::time_deleted.is_null()) - // this will sort by most specific first, i.e., - // - // (silo) - // (null) - // - // then by only taking the first result, we get the most specific one - .order(dsl::silo_id.asc().nulls_last()) + // join ip_pool to ip_pool_resource and filter + + // used in both success and error outcomes + let lookup_type = LookupType::ByCompositeId( + "Default pool for current silo".to_string(), + ); + + ip_pool::table + .inner_join(ip_pool_resource::table) + .filter( + ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo), + ) + .filter(ip_pool_resource::resource_id.eq(authz_silo_id)) + .filter(ip_pool_resource::is_default.eq(true)) + .filter(ip_pool::time_deleted.is_null()) + // Order by most specific first so we get the most specific. + // resource_type is an enum in the DB and therefore gets its order + // from the definition; it's not lexicographic. So correctness here + // relies on the types being most-specific-first in the definition. + // There are tests for this. + .order(ip_pool_resource::resource_type.asc()) .select(IpPool::as_select()) .first_async::( &*self.pool_connection_authorized(opctx).await?, ) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + // janky to do this manually, but this is an unusual kind of + // lookup in that it is by (silo_id, is_default=true), which is + // arguably a composite ID. + public_error_from_diesel_lookup( + e, + ResourceType::IpPool, + &lookup_type, + ) + }) + .map(|ip_pool| { + let authz_pool = + authz::IpPool::new(authz::FLEET, ip_pool.id(), lookup_type); + (authz_pool, ip_pool) + }) } /// Looks up an IP pool intended for internal services. @@ -117,16 +232,24 @@ impl DataStore { &self, opctx: &OpContext, ) -> LookupResult<(authz::IpPool, IpPool)> { - use db::schema::ip_pool::dsl; + use db::schema::ip_pool; + use db::schema::ip_pool_resource; opctx .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) .await?; - // Look up this IP pool by rack ID. - let (authz_pool, pool) = dsl::ip_pool - .filter(dsl::silo_id.eq(*INTERNAL_SILO_ID)) - .filter(dsl::time_deleted.is_null()) + // Look up IP pool by its association with the internal silo. + // We assume there is only one pool for that silo, or at least, + // if there is more than one, it doesn't matter which one we pick. + let (authz_pool, pool) = ip_pool::table + .inner_join(ip_pool_resource::table) + .filter(ip_pool::time_deleted.is_null()) + .filter( + ip_pool_resource::resource_type + .eq(IpPoolResourceType::Silo) + .and(ip_pool_resource::resource_id.eq(*INTERNAL_SILO_ID)), + ) .select(IpPool::as_select()) .get_result_async(&*self.pool_connection_authorized(opctx).await?) .await @@ -179,6 +302,7 @@ impl DataStore { ) -> DeleteResult { use db::schema::ip_pool::dsl; use db::schema::ip_pool_range; + use db::schema::ip_pool_resource; opctx.authorize(authz::Action::Delete, authz_pool).await?; // Verify there are no IP ranges still in this pool @@ -199,15 +323,28 @@ impl DataStore { )); } + // Verify there are no linked silos + let silo_link = ip_pool_resource::table + .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) + .select(ip_pool_resource::resource_id) + .limit(1) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + if silo_link.is_some() { + return Err(Error::invalid_request( + "IP Pool cannot be deleted while it is linked to a silo", + )); + } + // Delete the pool, conditional on the rcgen not having changed. This // protects the delete from occuring if clients created a new IP range // in between the above check for children and this query. let now = Utc::now(); let updated_rows = diesel::update(dsl::ip_pool) - // != excludes nulls so we explicitly include them - .filter( - dsl::silo_id.ne(*INTERNAL_SILO_ID).or(dsl::silo_id.is_null()), - ) .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(authz_pool.id())) .filter(dsl::rcgen.eq(db_pool.rcgen)) @@ -229,6 +366,36 @@ impl DataStore { Ok(()) } + /// Check whether the pool is internal by checking that it exists and is + /// associated with the internal silo + pub async fn ip_pool_is_internal( + &self, + opctx: &OpContext, + authz_pool: &authz::IpPool, + ) -> LookupResult { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + ip_pool::table + .inner_join(ip_pool_resource::table) + .filter(ip_pool::id.eq(authz_pool.id())) + .filter( + ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo), + ) + .filter(ip_pool_resource::resource_id.eq(*INTERNAL_SILO_ID)) + .filter(ip_pool::time_deleted.is_null()) + .select(ip_pool::id) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .optional() + // if there is a result, the pool is associated with the internal silo, + // which makes it the internal pool + .map(|result| Ok(result.is_some())) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + } + pub async fn ip_pool_update( &self, opctx: &OpContext, @@ -237,11 +404,8 @@ impl DataStore { ) -> UpdateResult { use db::schema::ip_pool::dsl; opctx.authorize(authz::Action::Modify, authz_pool).await?; + diesel::update(dsl::ip_pool) - // != excludes nulls so we explicitly include them - .filter( - dsl::silo_id.ne(*INTERNAL_SILO_ID).or(dsl::silo_id.is_null()), - ) .filter(dsl::id.eq(authz_pool.id())) .filter(dsl::time_deleted.is_null()) .set(updates) @@ -256,6 +420,296 @@ impl DataStore { }) } + pub async fn ip_pool_silo_list( + &self, + opctx: &OpContext, + authz_pool: &authz::IpPool, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + paginated( + ip_pool_resource::table, + ip_pool_resource::ip_pool_id, + pagparams, + ) + .inner_join(ip_pool::table) + .filter(ip_pool::id.eq(authz_pool.id())) + .filter(ip_pool::time_deleted.is_null()) + .select(IpPoolResource::as_select()) + .load_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn ip_pool_link_silo( + &self, + opctx: &OpContext, + ip_pool_resource: IpPoolResource, + ) -> CreateResult { + use db::schema::ip_pool_resource::dsl; + opctx + .authorize(authz::Action::CreateChild, &authz::IP_POOL_LIST) + .await?; + + diesel::insert_into(dsl::ip_pool_resource) + .values(ip_pool_resource.clone()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::IpPoolResource, + &format!( + "ip_pool_id: {:?}, resource_id: {:?}, resource_type: {:?}", + ip_pool_resource.ip_pool_id, + ip_pool_resource.resource_id, + ip_pool_resource.resource_type, + ) + ), + ) + }) + } + + pub async fn ip_pool_set_default( + &self, + opctx: &OpContext, + authz_ip_pool: &authz::IpPool, + authz_silo: &authz::Silo, + is_default: bool, + ) -> UpdateResult { + use db::schema::ip_pool_resource::dsl; + + opctx.authorize(authz::Action::Modify, authz_ip_pool).await?; + opctx.authorize(authz::Action::Modify, authz_silo).await?; + + let ip_pool_id = authz_ip_pool.id(); + let silo_id = authz_silo.id(); + + let conn = self.pool_connection_authorized(opctx).await?; + + // if we're making is_default false, we can just do that without + // checking any other stuff + if !is_default { + let updated_link = diesel::update(dsl::ip_pool_resource) + .filter(dsl::resource_id.eq(silo_id)) + .filter(dsl::ip_pool_id.eq(ip_pool_id)) + .filter(dsl::resource_type.eq(IpPoolResourceType::Silo)) + .set(dsl::is_default.eq(false)) + .returning(IpPoolResource::as_returning()) + .get_result_async(&*conn) + .await + .map_err(|e| { + Error::internal_error(&format!( + "Transaction error: {:?}", + e + )) + })?; + return Ok(updated_link); + } + + // Errors returned from the below transactions. + #[derive(Debug)] + enum IpPoolResourceUpdateError { + FailedToUnsetDefault(DieselError), + } + type TxnError = TransactionError; + + conn.transaction_async(|conn| async move { + // note this is matching the specified silo, but could be any pool + let existing_default_for_silo = dsl::ip_pool_resource + .filter(dsl::resource_type.eq(IpPoolResourceType::Silo)) + .filter(dsl::resource_id.eq(silo_id)) + .filter(dsl::is_default.eq(true)) + .select(IpPoolResource::as_select()) + .get_result_async(&conn) + .await; + + // if there is an existing default, we need to unset it before we can + // set the new default + if let Ok(existing_default) = existing_default_for_silo { + // if the pool we're making default is already default for this + // silo, don't error: just noop + if existing_default.ip_pool_id == ip_pool_id { + return Ok(existing_default); + } + + let unset_default = diesel::update(dsl::ip_pool_resource) + .filter(dsl::resource_id.eq(existing_default.resource_id)) + .filter(dsl::ip_pool_id.eq(existing_default.ip_pool_id)) + .filter( + dsl::resource_type.eq(existing_default.resource_type), + ) + .set(dsl::is_default.eq(false)) + .execute_async(&conn) + .await; + if let Err(e) = unset_default { + return Err(TxnError::CustomError( + IpPoolResourceUpdateError::FailedToUnsetDefault(e), + )); + } + } + + let updated_link = diesel::update(dsl::ip_pool_resource) + .filter(dsl::resource_id.eq(silo_id)) + .filter(dsl::ip_pool_id.eq(ip_pool_id)) + .filter(dsl::resource_type.eq(IpPoolResourceType::Silo)) + .set(dsl::is_default.eq(true)) + .returning(IpPoolResource::as_returning()) + .get_result_async(&conn) + .await?; + Ok(updated_link) + }) + .await + .map_err(|e| match e { + TransactionError::CustomError( + IpPoolResourceUpdateError::FailedToUnsetDefault(e), + ) => public_error_from_diesel(e, ErrorHandler::Server), + TransactionError::Database(e) => public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::IpPoolResource, + // TODO: would be nice to put the actual names and/or ids in + // here but LookupType on each of the two silos doesn't have + // a nice to_string yet or a way of composing them + LookupType::ByCompositeId("(pool, silo)".to_string()), + ), + ), + }) + } + + /// Ephemeral and snat IPs are associated with a silo through an instance, + /// so in order to see if there are any such IPs outstanding in the given + /// silo, we have to join IP -> Instance -> Project -> Silo + async fn ensure_no_instance_ips_outstanding( + &self, + opctx: &OpContext, + authz_pool: &authz::IpPool, + authz_silo: &authz::Silo, + ) -> Result<(), Error> { + use db::schema::external_ip; + use db::schema::instance; + use db::schema::project; + + let existing_ips = external_ip::table + .inner_join( + instance::table + .on(external_ip::parent_id.eq(instance::id.nullable())), + ) + .inner_join(project::table.on(instance::project_id.eq(project::id))) + .filter(external_ip::is_service.eq(false)) + .filter(external_ip::parent_id.is_not_null()) + .filter(external_ip::time_deleted.is_null()) + .filter(external_ip::ip_pool_id.eq(authz_pool.id())) + // important, floating IPs are handled separately + .filter(external_ip::kind.eq(IpKind::Ephemeral).or(external_ip::kind.eq(IpKind::SNat))) + .filter(instance::time_deleted.is_null()) + // we have to join through IPs to instances to projects to get the silo ID + .filter(project::silo_id.eq(authz_silo.id())) + .select(ExternalIp::as_select()) + .limit(1) + .load_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "error checking for outstanding IPs before deleting IP pool association to resource: {:?}", + e + )) + })?; + + if !existing_ips.is_empty() { + return Err(Error::invalid_request( + "IP addresses from this pool are in use in the linked silo", + )); + } + + Ok(()) + } + + /// Floating IPs are associated with a silo through a project, so this one + /// is a little simpler than ephemeral. We join IP -> Project -> Silo. + async fn ensure_no_floating_ips_outstanding( + &self, + opctx: &OpContext, + authz_pool: &authz::IpPool, + authz_silo: &authz::Silo, + ) -> Result<(), Error> { + use db::schema::external_ip; + use db::schema::project; + + let existing_ips = external_ip::table + .inner_join(project::table.on(external_ip::project_id.eq(project::id.nullable()))) + .filter(external_ip::is_service.eq(false)) + .filter(external_ip::time_deleted.is_null()) + // all floating IPs have a project + .filter(external_ip::project_id.is_not_null()) + .filter(external_ip::ip_pool_id.eq(authz_pool.id())) + .filter(external_ip::kind.eq(IpKind::Floating)) + // we have to join through IPs to projects to get the silo ID + .filter(project::silo_id.eq(authz_silo.id())) + .filter(project::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .limit(1) + .load_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "error checking for outstanding IPs before deleting IP pool association to resource: {:?}", + e + )) + })?; + + if !existing_ips.is_empty() { + return Err(Error::invalid_request( + "IP addresses from this pool are in use in the linked silo", + )); + } + + Ok(()) + } + + /// Delete IP pool assocation with resource unless there are outstanding + /// IPs allocated from the pool in the associated silo + pub async fn ip_pool_unlink_silo( + &self, + opctx: &OpContext, + authz_pool: &authz::IpPool, + authz_silo: &authz::Silo, + ) -> DeleteResult { + use db::schema::ip_pool_resource; + + opctx.authorize(authz::Action::Modify, authz_pool).await?; + opctx.authorize(authz::Action::Modify, authz_silo).await?; + + // We can only delete the association if there are no IPs allocated + // from this pool in the associated resource. + self.ensure_no_instance_ips_outstanding(opctx, authz_pool, authz_silo) + .await?; + self.ensure_no_floating_ips_outstanding(opctx, authz_pool, authz_silo) + .await?; + + diesel::delete(ip_pool_resource::table) + .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) + .filter(ip_pool_resource::resource_id.eq(authz_silo.id())) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|_rows_deleted| ()) + .map_err(|e| { + Error::internal_error(&format!( + "error deleting IP pool association to resource: {:?}", + e + )) + }) + } + pub async fn ip_pool_list_ranges( &self, opctx: &OpContext, @@ -422,12 +876,18 @@ impl DataStore { #[cfg(test)] mod test { + use std::num::NonZeroU32; + + use crate::authz; use crate::db::datastore::datastore_test; - use crate::db::model::IpPool; + use crate::db::model::{IpPool, IpPoolResource, IpPoolResourceType}; use assert_matches::assert_matches; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Resource; - use omicron_common::api::external::{Error, IdentityMetadataCreateParams}; + use omicron_common::api::external::http_pagination::PaginatedBy; + use omicron_common::api::external::{ + DataPageParams, Error, IdentityMetadataCreateParams, LookupType, + }; use omicron_test_utils::dev; #[tokio::test] @@ -436,83 +896,212 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - // we start out with the default fleet-level pool already created, - // so when we ask for a default silo, we get it back - let fleet_default_pool = - datastore.ip_pools_fetch_default(&opctx).await.unwrap(); + // we start out with no default pool, so we expect not found + let error = datastore.ip_pools_fetch_default(&opctx).await.unwrap_err(); + assert_matches!(error, Error::ObjectNotFound { .. }); - assert_eq!(fleet_default_pool.identity.name.as_str(), "default"); - assert!(fleet_default_pool.is_default); - assert_eq!(fleet_default_pool.silo_id, None); + let pagparams_id = DataPageParams { + marker: None, + limit: NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + let pagbyid = PaginatedBy::Id(pagparams_id); + + let all_pools = datastore + .ip_pools_list(&opctx, &pagbyid) + .await + .expect("Should list IP pools"); + assert_eq!(all_pools.len(), 0); + let silo_pools = datastore + .silo_ip_pools_list(&opctx, &pagbyid) + .await + .expect("Should list silo IP pools"); + assert_eq!(silo_pools.len(), 0); - // unique index prevents second fleet-level default + let authz_silo = opctx.authn.silo_required().unwrap(); + let silo_id = authz_silo.id(); + + // create a non-default pool for the silo let identity = IdentityMetadataCreateParams { - name: "another-fleet-default".parse().unwrap(), + name: "pool1-for-silo".parse().unwrap(), description: "".to_string(), }; - let err = datastore - .ip_pool_create( - &opctx, - IpPool::new(&identity, None, /*default= */ true), - ) + let pool1_for_silo = datastore + .ip_pool_create(&opctx, IpPool::new(&identity)) .await - .expect_err("Failed to fail to create a second default fleet pool"); - assert_matches!(err, Error::ObjectAlreadyExists { .. }); + .expect("Failed to create IP pool"); - // when we fetch the default pool for a silo, if those scopes do not - // have a default IP pool, we will still get back the fleet default + // shows up in full list but not silo list + let all_pools = datastore + .ip_pools_list(&opctx, &pagbyid) + .await + .expect("Should list IP pools"); + assert_eq!(all_pools.len(), 1); + let silo_pools = datastore + .silo_ip_pools_list(&opctx, &pagbyid) + .await + .expect("Should list silo IP pools"); + assert_eq!(silo_pools.len(), 0); - let silo_id = opctx.authn.silo_required().unwrap().id(); + // make default should fail when there is no link yet + let authz_pool = authz::IpPool::new( + authz::FLEET, + pool1_for_silo.id(), + LookupType::ById(pool1_for_silo.id()), + ); + let error = datastore + .ip_pool_set_default(&opctx, &authz_pool, &authz_silo, true) + .await + .expect_err("Should not be able to make non-existent link default"); + assert_matches!(error, Error::ObjectNotFound { .. }); - // create a non-default pool for the silo - let identity = IdentityMetadataCreateParams { - name: "non-default-for-silo".parse().unwrap(), - description: "".to_string(), + // now link to silo + let link_body = IpPoolResource { + ip_pool_id: pool1_for_silo.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, }; datastore - .ip_pool_create( - &opctx, - IpPool::new(&identity, Some(silo_id), /*default= */ false), - ) + .ip_pool_link_silo(&opctx, link_body.clone()) .await - .expect("Failed to create silo non-default IP pool"); + .expect("Failed to associate IP pool with silo"); // because that one was not a default, when we ask for the silo default - // pool, we still get the fleet default - let ip_pool = datastore - .ip_pools_fetch_default(&opctx) + // pool, we still get nothing + let error = datastore.ip_pools_fetch_default(&opctx).await.unwrap_err(); + assert_matches!(error, Error::ObjectNotFound { .. }); + + // now it shows up in the silo list + let silo_pools = datastore + .silo_ip_pools_list(&opctx, &pagbyid) .await - .expect("Failed to get silo default IP pool"); - assert_eq!(ip_pool.id(), fleet_default_pool.id()); + .expect("Should list silo IP pools"); + assert_eq!(silo_pools.len(), 1); + assert_eq!(silo_pools[0].id(), pool1_for_silo.id()); - // now create a default pool for the silo - let identity = IdentityMetadataCreateParams { - name: "default-for-silo".parse().unwrap(), - description: "".to_string(), - }; + // linking an already linked silo errors due to PK conflict + let err = datastore + .ip_pool_link_silo(&opctx, link_body) + .await + .expect_err("Creating the same link again should conflict"); + assert_matches!(err, Error::ObjectAlreadyExists { .. }); + + // now make it default + datastore + .ip_pool_set_default(&opctx, &authz_pool, &authz_silo, true) + .await + .expect("Should be able to make pool default"); + + // setting default if already default is allowed datastore - .ip_pool_create(&opctx, IpPool::new(&identity, Some(silo_id), true)) + .ip_pool_set_default(&opctx, &authz_pool, &authz_silo, true) .await - .expect("Failed to create silo default IP pool"); + .expect("Should be able to make pool default again"); - // now when we ask for the default pool, we get the one we just made - let ip_pool = datastore + // now when we ask for the default pool again, we get that one + let (authz_pool1_for_silo, ip_pool) = datastore .ip_pools_fetch_default(&opctx) .await .expect("Failed to get silo's default IP pool"); - assert_eq!(ip_pool.name().as_str(), "default-for-silo"); + assert_eq!(ip_pool.name().as_str(), "pool1-for-silo"); // and we can't create a second default pool for the silo let identity = IdentityMetadataCreateParams { name: "second-default-for-silo".parse().unwrap(), description: "".to_string(), }; + let second_silo_default = datastore + .ip_pool_create(&opctx, IpPool::new(&identity)) + .await + .expect("Failed to create pool"); let err = datastore - .ip_pool_create(&opctx, IpPool::new(&identity, Some(silo_id), true)) + .ip_pool_link_silo( + &opctx, + IpPoolResource { + ip_pool_id: second_silo_default.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: true, + }, + ) .await - .expect_err("Failed to fail to create second default pool"); + .expect_err("Failed to fail to set a second default pool for silo"); assert_matches!(err, Error::ObjectAlreadyExists { .. }); + // now remove the association and we should get nothing again + let authz_silo = + authz::Silo::new(authz::Fleet, silo_id, LookupType::ById(silo_id)); + datastore + .ip_pool_unlink_silo(&opctx, &authz_pool1_for_silo, &authz_silo) + .await + .expect("Failed to unlink IP pool from silo"); + + // no default + let error = datastore.ip_pools_fetch_default(&opctx).await.unwrap_err(); + assert_matches!(error, Error::ObjectNotFound { .. }); + + // and silo pools list is empty again + let silo_pools = datastore + .silo_ip_pools_list(&opctx, &pagbyid) + .await + .expect("Should list silo IP pools"); + assert_eq!(silo_pools.len(), 0); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_internal_ip_pool() { + let logctx = dev::test_setup_log("test_internal_ip_pool"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // confirm internal pool appears as internal + let (authz_pool, _pool) = + datastore.ip_pools_service_lookup(&opctx).await.unwrap(); + + let is_internal = + datastore.ip_pool_is_internal(&opctx, &authz_pool).await; + assert_eq!(is_internal, Ok(true)); + + // another random pool should not be considered internal + let identity = IdentityMetadataCreateParams { + name: "other-pool".parse().unwrap(), + description: "".to_string(), + }; + let other_pool = datastore + .ip_pool_create(&opctx, IpPool::new(&identity)) + .await + .expect("Failed to create IP pool"); + + let authz_other_pool = authz::IpPool::new( + authz::FLEET, + other_pool.id(), + LookupType::ById(other_pool.id()), + ); + let is_internal = + datastore.ip_pool_is_internal(&opctx, &authz_other_pool).await; + assert_eq!(is_internal, Ok(false)); + + // now link it to the current silo, and it is still not internal + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: other_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: true, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Failed to make IP pool default for silo"); + + let is_internal = + datastore.ip_pool_is_internal(&opctx, &authz_other_pool).await; + assert_eq!(is_internal, Ok(false)); + db.cleanup().await.unwrap(); logctx.cleanup_successful(); } diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index a9015ea943..e3927fdfc1 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -347,34 +347,4 @@ impl DataStore { ) }) } - - /// List IP Pools accessible to a project - pub async fn project_ip_pools_list( - &self, - opctx: &OpContext, - authz_project: &authz::Project, - pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - use db::schema::ip_pool::dsl; - opctx.authorize(authz::Action::ListChildren, authz_project).await?; - match pagparams { - PaginatedBy::Id(pagparams) => { - paginated(dsl::ip_pool, dsl::id, pagparams) - } - PaginatedBy::Name(pagparams) => paginated( - dsl::ip_pool, - dsl::name, - &pagparams.map_name(|n| Name::ref_cast(n)), - ), - } - // TODO(2148, 2056): filter only pools accessible by the given - // project, once specific projects for pools are implemented - // != excludes nulls so we explicitly include them - .filter(dsl::silo_id.ne(*INTERNAL_SILO_ID).or(dsl::silo_id.is_null())) - .filter(dsl::time_deleted.is_null()) - .select(db::model::IpPool::as_select()) - .get_results_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 728da0b0d1..50bae03c2d 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -753,36 +753,37 @@ impl DataStore { self.rack_insert(opctx, &db::model::Rack::new(rack_id)).await?; - let internal_pool = db::model::IpPool::new( - &IdentityMetadataCreateParams { + let internal_pool = + db::model::IpPool::new(&IdentityMetadataCreateParams { name: SERVICE_IP_POOL_NAME.parse::().unwrap(), description: String::from("IP Pool for Oxide Services"), - }, - Some(*INTERNAL_SILO_ID), - true, // default for internal silo - ); + }); - self.ip_pool_create(opctx, internal_pool).await.map(|_| ()).or_else( - |e| match e { - Error::ObjectAlreadyExists { .. } => Ok(()), - _ => Err(e), - }, - )?; + let internal_pool_id = internal_pool.id(); - let default_pool = db::model::IpPool::new( - &IdentityMetadataCreateParams { - name: "default".parse::().unwrap(), - description: String::from("default IP pool"), - }, - None, // no silo ID, fleet scoped - true, // default for fleet - ); - self.ip_pool_create(opctx, default_pool).await.map(|_| ()).or_else( - |e| match e { - Error::ObjectAlreadyExists { .. } => Ok(()), + let internal_created = self + .ip_pool_create(opctx, internal_pool) + .await + .map(|_| true) + .or_else(|e| match e { + Error::ObjectAlreadyExists { .. } => Ok(false), _ => Err(e), - }, - )?; + })?; + + // make default for the internal silo. only need to do this if + // the create went through, i.e., if it wasn't already there + if internal_created { + self.ip_pool_link_silo( + opctx, + db::model::IpPoolResource { + ip_pool_id: internal_pool_id, + resource_type: db::model::IpPoolResourceType::Silo, + resource_id: *INTERNAL_SILO_ID, + is_default: true, + }, + ) + .await?; + } Ok(()) } @@ -1329,7 +1330,7 @@ mod test { // been allocated as a part of the service IP pool. let (.., svc_pool) = datastore.ip_pools_service_lookup(&opctx).await.unwrap(); - assert_eq!(svc_pool.silo_id, Some(*INTERNAL_SILO_ID)); + assert_eq!(svc_pool.name().as_str(), "oxide-service-pool"); let observed_ip_pool_ranges = get_all_ip_pool_ranges(&datastore).await; assert_eq!(observed_ip_pool_ranges.len(), 1); @@ -1531,7 +1532,7 @@ mod test { // allocated as a part of the service IP pool. let (.., svc_pool) = datastore.ip_pools_service_lookup(&opctx).await.unwrap(); - assert_eq!(svc_pool.silo_id, Some(*INTERNAL_SILO_ID)); + assert_eq!(svc_pool.name().as_str(), "oxide-service-pool"); let observed_ip_pool_ranges = get_all_ip_pool_ranges(&datastore).await; assert_eq!(observed_ip_pool_ranges.len(), 1); diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 6fb951de84..090c6865b7 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -48,6 +48,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "identity_type", "instance_state", "ip_kind", + "ip_pool_resource_type", "network_interface_kind", "physical_disk_kind", "producer_kind", diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 2a76ea7408..49403aac61 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -833,6 +833,8 @@ mod tests { use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use dropshot::test_util::LogContext; + use nexus_db_model::IpPoolResource; + use nexus_db_model::IpPoolResourceType; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::shared::IpRange; use omicron_common::address::NUM_SOURCE_NAT_PORTS; @@ -870,34 +872,34 @@ mod tests { Self { logctx, opctx, db, db_datastore } } + /// Create pool, associate with current silo async fn create_ip_pool( &self, name: &str, range: IpRange, is_default: bool, ) { - let silo_id = self.opctx.authn.silo_required().unwrap().id(); - let pool = IpPool::new( - &IdentityMetadataCreateParams { - name: String::from(name).parse().unwrap(), - description: format!("ip pool {}", name), - }, - Some(silo_id), - is_default, - ); + let pool = IpPool::new(&IdentityMetadataCreateParams { + name: String::from(name).parse().unwrap(), + description: format!("ip pool {}", name), + }); - let conn = self - .db_datastore - .pool_connection_authorized(&self.opctx) + self.db_datastore + .ip_pool_create(&self.opctx, pool.clone()) .await - .unwrap(); + .expect("Failed to create IP pool"); - use crate::db::schema::ip_pool::dsl as ip_pool_dsl; - diesel::insert_into(ip_pool_dsl::ip_pool) - .values(pool.clone()) - .execute_async(&*conn) + let silo_id = self.opctx.authn.silo_required().unwrap().id(); + let association = IpPoolResource { + resource_id: silo_id, + resource_type: IpPoolResourceType::Silo, + ip_pool_id: pool.id(), + is_default, + }; + self.db_datastore + .ip_pool_link_silo(&self.opctx, association) .await - .expect("Failed to create IP Pool"); + .expect("Failed to associate IP pool with silo"); self.initialize_ip_pool(name, range).await; } @@ -936,7 +938,7 @@ mod tests { } async fn default_pool_id(&self) -> Uuid { - let pool = self + let (.., pool) = self .db_datastore .ip_pools_fetch_default(&self.opctx) .await @@ -960,7 +962,7 @@ mod tests { Ipv4Addr::new(10, 0, 0, 1), )) .unwrap(); - context.initialize_ip_pool("default", range).await; + context.create_ip_pool("default", range, true).await; for first_port in (0..super::MAX_PORT).step_by(NUM_SOURCE_NAT_PORTS.into()) { @@ -1015,7 +1017,7 @@ mod tests { Ipv4Addr::new(10, 0, 0, 1), )) .unwrap(); - context.initialize_ip_pool("default", range).await; + context.create_ip_pool("default", range, true).await; // Allocate an Ephemeral IP, which should take the entire port range of // the only address in the pool. @@ -1098,7 +1100,7 @@ mod tests { Ipv4Addr::new(10, 0, 0, 3), )) .unwrap(); - context.initialize_ip_pool("default", range).await; + context.create_ip_pool("default", range, true).await; // TODO-completeness: Implementing Iterator for IpRange would be nice. let addresses = [ @@ -1199,7 +1201,7 @@ mod tests { Ipv4Addr::new(10, 0, 0, 3), )) .unwrap(); - context.initialize_ip_pool("default", range).await; + context.create_ip_pool("default", range, true).await; let instance_id = Uuid::new_v4(); let id = Uuid::new_v4(); @@ -1659,7 +1661,7 @@ mod tests { Ipv4Addr::new(10, 0, 0, 3), )) .unwrap(); - context.initialize_ip_pool("default", range).await; + context.create_ip_pool("default", range, true).await; // Create one SNAT IP address. let instance_id = Uuid::new_v4(); @@ -1721,13 +1723,13 @@ mod tests { Ipv4Addr::new(10, 0, 0, 3), )) .unwrap(); - context.initialize_ip_pool("default", first_range).await; + context.create_ip_pool("default", first_range, true).await; let second_range = IpRange::try_from(( Ipv4Addr::new(10, 0, 0, 4), Ipv4Addr::new(10, 0, 0, 6), )) .unwrap(); - context.create_ip_pool("p1", second_range, /*default*/ false).await; + context.create_ip_pool("p1", second_range, false).await; // Allocating an address on an instance in the second pool should be // respected, even though there are IPs available in the first. @@ -1765,12 +1767,12 @@ mod tests { Ipv4Addr::new(10, 0, 0, 3), )) .unwrap(); - context.initialize_ip_pool("default", first_range).await; + context.create_ip_pool("default", first_range, true).await; let first_address = Ipv4Addr::new(10, 0, 0, 4); let last_address = Ipv4Addr::new(10, 0, 0, 6); let second_range = IpRange::try_from((first_address, last_address)).unwrap(); - context.create_ip_pool("p1", second_range, /* default */ false).await; + context.create_ip_pool("p1", second_range, false).await; // Allocate all available addresses in the second pool. let instance_id = Uuid::new_v4(); diff --git a/nexus/src/app/ip_pool.rs b/nexus/src/app/ip_pool.rs index 5efdaf7b6f..1d9b3e515e 100644 --- a/nexus/src/app/ip_pool.rs +++ b/nexus/src/app/ip_pool.rs @@ -7,14 +7,14 @@ use crate::external_api::params; use crate::external_api::shared::IpRange; use ipnetwork::IpNetwork; -use nexus_db_model::IpPool; use nexus_db_queries::authz; +use nexus_db_queries::authz::ApiResource; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::fixed_data::silo::INTERNAL_SILO_ID; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::Name; +use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -26,9 +26,22 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; +use uuid::Uuid; -fn is_internal(pool: &IpPool) -> bool { - pool.silo_id == Some(*INTERNAL_SILO_ID) +/// Helper to make it easier to 404 on attempts to manipulate internal pools +fn not_found_from_lookup(pool_lookup: &lookup::IpPool<'_>) -> Error { + match pool_lookup { + lookup::IpPool::Name(_, name) => { + Error::not_found_by_name(ResourceType::IpPool, &name) + } + lookup::IpPool::OwnedName(_, name) => { + Error::not_found_by_name(ResourceType::IpPool, &name) + } + lookup::IpPool::PrimaryKey(_, id) => { + Error::not_found_by_id(ResourceType::IpPool, &id) + } + lookup::IpPool::Error(_, error) => error.to_owned(), + } } impl super::Nexus { @@ -56,24 +69,112 @@ impl super::Nexus { opctx: &OpContext, pool_params: ¶ms::IpPoolCreate, ) -> CreateResult { - let silo_id = match pool_params.clone().silo { - Some(silo) => { - let (.., authz_silo) = self - .silo_lookup(&opctx, silo)? - .lookup_for(authz::Action::Read) - .await?; - Some(authz_silo.id()) - } - _ => None, - }; - let pool = db::model::IpPool::new( - &pool_params.identity, - silo_id, - pool_params.is_default, - ); + let pool = db::model::IpPool::new(&pool_params.identity); self.db_datastore.ip_pool_create(opctx, pool).await } + /// List IP pools in current silo + pub(crate) async fn silo_ip_pools_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + self.db_datastore.silo_ip_pools_list(opctx, pagparams).await + } + + // Look up pool by name or ID, but only return it if it's linked to the + // current silo + pub async fn silo_ip_pool_fetch<'a>( + &'a self, + opctx: &'a OpContext, + pool: &'a NameOrId, + ) -> LookupResult { + let (authz_pool, pool) = + self.ip_pool_lookup(opctx, pool)?.fetch().await?; + + // 404 if no link is found in the current silo + let link = self.db_datastore.ip_pool_fetch_link(opctx, pool.id()).await; + if link.is_err() { + return Err(authz_pool.not_found()); + } + + Ok(pool) + } + + pub(crate) async fn ip_pool_silo_list( + &self, + opctx: &OpContext, + pool_lookup: &lookup::IpPool<'_>, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + let (.., authz_pool) = + pool_lookup.lookup_for(authz::Action::ListChildren).await?; + self.db_datastore.ip_pool_silo_list(opctx, &authz_pool, pagparams).await + } + + pub(crate) async fn ip_pool_link_silo( + &self, + opctx: &OpContext, + pool_lookup: &lookup::IpPool<'_>, + silo_link: ¶ms::IpPoolSiloLink, + ) -> CreateResult { + let (authz_pool,) = + pool_lookup.lookup_for(authz::Action::Modify).await?; + let (authz_silo,) = self + .silo_lookup(&opctx, silo_link.silo.clone())? + .lookup_for(authz::Action::Modify) + .await?; + self.db_datastore + .ip_pool_link_silo( + opctx, + db::model::IpPoolResource { + ip_pool_id: authz_pool.id(), + resource_type: db::model::IpPoolResourceType::Silo, + resource_id: authz_silo.id(), + is_default: silo_link.is_default, + }, + ) + .await + } + + pub(crate) async fn ip_pool_unlink_silo( + &self, + opctx: &OpContext, + pool_lookup: &lookup::IpPool<'_>, + silo_lookup: &lookup::Silo<'_>, + ) -> DeleteResult { + let (.., authz_pool) = + pool_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Modify).await?; + + self.db_datastore + .ip_pool_unlink_silo(opctx, &authz_pool, &authz_silo) + .await + } + + pub(crate) async fn ip_pool_silo_update( + &self, + opctx: &OpContext, + pool_lookup: &lookup::IpPool<'_>, + silo_lookup: &lookup::Silo<'_>, + update: ¶ms::IpPoolSiloUpdate, + ) -> CreateResult { + let (.., authz_pool) = + pool_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Modify).await?; + + self.db_datastore + .ip_pool_set_default( + opctx, + &authz_pool, + &authz_silo, + update.is_default, + ) + .await + } + pub(crate) async fn ip_pools_list( &self, opctx: &OpContext, @@ -89,6 +190,13 @@ impl super::Nexus { ) -> DeleteResult { let (.., authz_pool, db_pool) = pool_lookup.fetch_for(authz::Action::Delete).await?; + + let is_internal = + self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; + if is_internal { + return Err(not_found_from_lookup(pool_lookup)); + } + self.db_datastore.ip_pool_delete(opctx, &authz_pool, &db_pool).await } @@ -100,6 +208,13 @@ impl super::Nexus { ) -> UpdateResult { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::Modify).await?; + + let is_internal = + self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; + if is_internal { + return Err(not_found_from_lookup(pool_lookup)); + } + self.db_datastore .ip_pool_update(opctx, &authz_pool, updates.clone().into()) .await @@ -111,13 +226,13 @@ impl super::Nexus { pool_lookup: &lookup::IpPool<'_>, pagparams: &DataPageParams<'_, IpNetwork>, ) -> ListResultVec { - let (.., authz_pool, db_pool) = - pool_lookup.fetch_for(authz::Action::ListChildren).await?; - if is_internal(&db_pool) { - return Err(Error::not_found_by_name( - ResourceType::IpPool, - &db_pool.identity.name, - )); + let (.., authz_pool) = + pool_lookup.lookup_for(authz::Action::ListChildren).await?; + + let is_internal = + self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; + if is_internal { + return Err(not_found_from_lookup(pool_lookup)); } self.db_datastore @@ -131,13 +246,13 @@ impl super::Nexus { pool_lookup: &lookup::IpPool<'_>, range: &IpRange, ) -> UpdateResult { - let (.., authz_pool, db_pool) = + let (.., authz_pool, _db_pool) = pool_lookup.fetch_for(authz::Action::Modify).await?; - if is_internal(&db_pool) { - return Err(Error::not_found_by_name( - ResourceType::IpPool, - &db_pool.identity.name, - )); + + let is_internal = + self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; + if is_internal { + return Err(not_found_from_lookup(pool_lookup)); } self.db_datastore.ip_pool_add_range(opctx, &authz_pool, range).await } @@ -148,14 +263,16 @@ impl super::Nexus { pool_lookup: &lookup::IpPool<'_>, range: &IpRange, ) -> DeleteResult { - let (.., authz_pool, db_pool) = + let (.., authz_pool, _db_pool) = pool_lookup.fetch_for(authz::Action::Modify).await?; - if is_internal(&db_pool) { - return Err(Error::not_found_by_name( - ResourceType::IpPool, - &db_pool.identity.name, - )); + + let is_internal = + self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; + + if is_internal { + return Err(not_found_from_lookup(pool_lookup)); } + self.db_datastore.ip_pool_delete_range(opctx, &authz_pool, range).await } diff --git a/nexus/src/app/project.rs b/nexus/src/app/project.rs index 6e8727a889..2e852ba2d3 100644 --- a/nexus/src/app/project.rs +++ b/nexus/src/app/project.rs @@ -8,7 +8,6 @@ use crate::app::sagas; use crate::external_api::params; use crate::external_api::shared; use anyhow::Context; -use nexus_db_model::Name; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -24,7 +23,6 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use ref_cast::RefCast; use std::sync::Arc; impl super::Nexus { @@ -149,40 +147,4 @@ impl super::Nexus { .collect::, _>>()?; Ok(shared::Policy { role_assignments }) } - - pub(crate) async fn project_ip_pools_list( - &self, - opctx: &OpContext, - project_lookup: &lookup::Project<'_>, - pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - let (.., authz_project) = - project_lookup.lookup_for(authz::Action::ListChildren).await?; - - self.db_datastore - .project_ip_pools_list(opctx, &authz_project, pagparams) - .await - } - - pub fn project_ip_pool_lookup<'a>( - &'a self, - opctx: &'a OpContext, - pool: &'a NameOrId, - _project_lookup: &Option>, - ) -> LookupResult> { - // TODO(2148, 2056): check that the given project has access (if one - // is provided to the call) once that relation is implemented - match pool { - NameOrId::Name(name) => { - let pool = LookupPath::new(opctx, &self.db_datastore) - .ip_pool_name(Name::ref_cast(name)); - Ok(pool) - } - NameOrId::Id(id) => { - let pool = - LookupPath::new(opctx, &self.db_datastore).ip_pool_id(*id); - Ok(pool) - } - } - } } diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index ab62977746..9d52ec1501 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -834,10 +834,8 @@ pub(crate) mod test { use diesel::{ ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, }; - use dropshot::test_util::ClientTestContext; use nexus_db_queries::context::OpContext; use nexus_db_queries::{authn::saga::Serialized, db::datastore::DataStore}; - use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; @@ -853,12 +851,6 @@ pub(crate) mod test { const DISK_NAME: &str = "my-disk"; const PROJECT_NAME: &str = "springfield-squidport"; - async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None, None).await; - let project = create_project(client, PROJECT_NAME).await; - project.identity.id - } - pub fn new_disk_create_params() -> params::DiskCreate { params::DiskCreate { identity: IdentityMetadataCreateParams { @@ -896,7 +888,8 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let project_id = create_org_and_project(&client).await; + let project_id = + create_project(&client, PROJECT_NAME).await.identity.id; // Build the saga DAG with the provided test parameters let opctx = test_opctx(cptestctx); @@ -1065,7 +1058,8 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let project_id = create_org_and_project(&client).await; + let project_id = + create_project(&client, PROJECT_NAME).await.identity.id; let opctx = test_opctx(cptestctx); crate::app::sagas::test_helpers::action_failure_can_unwind::< @@ -1094,7 +1088,8 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - let project_id = create_org_and_project(&client).await; + let project_id = + create_project(&client, PROJECT_NAME).await.identity.id; let opctx = test_opctx(&cptestctx); crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::< @@ -1134,7 +1129,8 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - let project_id = create_org_and_project(&client).await; + let project_id = + create_project(&client, PROJECT_NAME).await.identity.id; // Build the saga DAG with the provided test parameters let opctx = test_opctx(&cptestctx); diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index f791d289db..333e6c1672 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -184,29 +184,20 @@ pub(crate) mod test { app::saga::create_saga_dag, app::sagas::disk_delete::Params, app::sagas::disk_delete::SagaDiskDelete, }; - use dropshot::test_util::ClientTestContext; use nexus_db_model::Disk; use nexus_db_queries::authn::saga::Serialized; use nexus_db_queries::context::OpContext; - use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use omicron_common::api::external::Name; - use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; const PROJECT_NAME: &str = "springfield-squidport"; - async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None, None).await; - let project = create_project(client, PROJECT_NAME).await; - project.identity.id - } - pub fn test_opctx(cptestctx: &ControlPlaneTestContext) -> OpContext { OpContext::for_tests( cptestctx.logctx.log.new(o!()), @@ -242,7 +233,7 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - let project_id = create_org_and_project(&client).await; + let project_id = create_project(client, PROJECT_NAME).await.identity.id; let disk = create_disk(&cptestctx).await; // Build the saga DAG with the provided test parameters @@ -268,7 +259,7 @@ pub(crate) mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - let project_id = create_org_and_project(&client).await; + let project_id = create_project(client, PROJECT_NAME).await.identity.id; let disk = create_disk(&cptestctx).await; // Build the saga DAG with the provided test parameters diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index fd86e2052a..c4c9c4e083 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -564,7 +564,7 @@ async fn sic_allocate_instance_snat_ip( let instance_id = sagactx.lookup::("instance_id")?; let ip_id = sagactx.lookup::("snat_ip_id")?; - let pool = datastore + let (.., pool) = datastore .ip_pools_fetch_default(&opctx) .await .map_err(ActionError::action_failed)?; @@ -909,9 +909,9 @@ pub mod test { use nexus_db_queries::authn::saga::Serialized; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::DataStore; + use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_project; - use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ @@ -930,7 +930,7 @@ pub mod test { const DISK_NAME: &str = "my-disk"; async fn create_org_project_and_disk(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; create_disk(&client, PROJECT_NAME, DISK_NAME).await; project.identity.id diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 7802312b10..013bececee 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -178,9 +178,9 @@ mod test { use nexus_db_queries::{ authn::saga::Serialized, context::OpContext, db, db::lookup::LookupPath, }; + use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_project; - use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::identity::Resource; @@ -199,7 +199,7 @@ mod test { const DISK_NAME: &str = "my-disk"; async fn create_org_project_and_disk(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; create_disk(&client, PROJECT_NAME, DISK_NAME).await; project.identity.id diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 7a417a5781..29c189efb4 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -501,10 +501,10 @@ mod tests { use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use nexus_test_interface::NexusServer; - use nexus_test_utils::{ - resource_helpers::{create_project, object_create, populate_ip_pool}, - start_sled_agent, + use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_project, object_create, }; + use nexus_test_utils::start_sled_agent; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, @@ -520,7 +520,7 @@ mod tests { const INSTANCE_NAME: &str = "test-instance"; async fn setup_test_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(&client, PROJECT_NAME).await; project.identity.id } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index e6717b0164..8957a838e7 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -734,7 +734,7 @@ mod test { use dropshot::test_util::ClientTestContext; use nexus_db_queries::authn; use nexus_test_utils::resource_helpers::{ - create_project, object_create, populate_ip_pool, + create_default_ip_pool, create_project, object_create, }; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ @@ -751,7 +751,7 @@ mod test { const INSTANCE_NAME: &str = "test-instance"; async fn setup_test_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(&client, PROJECT_NAME).await; project.identity.id } diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index c3fe6fc327..ed8c8ccebf 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1554,7 +1554,6 @@ mod test { use crate::app::saga::create_saga_dag; use crate::app::sagas::test_helpers; - use crate::external_api::shared::IpRange; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, @@ -1563,12 +1562,11 @@ mod test { use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::DataStore; + use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; - use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::delete_disk; use nexus_test_utils::resource_helpers::object_create; - use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::InstanceDiskAttachment; @@ -1580,7 +1578,6 @@ mod test { use omicron_common::api::external::NameOrId; use sled_agent_client::types::CrucibleOpts; use sled_agent_client::TestInterfaces as SledAgentTestInterfaces; - use std::net::Ipv4Addr; use std::str::FromStr; #[test] @@ -1785,8 +1782,10 @@ mod test { const DISK_NAME: &str = "disky-mcdiskface"; const INSTANCE_NAME: &str = "base-instance"; - async fn create_org_project_and_disk(client: &ClientTestContext) -> Uuid { - create_ip_pool(&client, "p0", None, None).await; + async fn create_project_and_disk_and_pool( + client: &ClientTestContext, + ) -> Uuid { + create_default_ip_pool(&client).await; create_project(client, PROJECT_NAME).await; create_disk(client, PROJECT_NAME, DISK_NAME).await.identity.id } @@ -1833,7 +1832,7 @@ mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let disk_id = create_org_project_and_disk(&client).await; + let disk_id = create_project_and_disk_and_pool(&client).await; // Build the saga DAG with the provided test parameters let opctx = test_opctx(cptestctx); @@ -2022,7 +2021,7 @@ mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let disk_id = create_org_project_and_disk(&client).await; + let disk_id = create_project_and_disk_and_pool(&client).await; // Build the saga DAG with the provided test parameters let opctx = test_opctx(&cptestctx); @@ -2040,24 +2039,6 @@ mod test { // before the first attempt to run the saga recreates it. delete_disk(client, PROJECT_NAME, DISK_NAME).await; - // The no-pantry variant of the test needs to see the disk attached to - // an instance. Set up an IP pool so that instances can be created - // against it. - if !use_the_pantry { - populate_ip_pool( - &client, - "default", - Some( - IpRange::try_from(( - Ipv4Addr::new(10, 1, 0, 0), - Ipv4Addr::new(10, 1, 255, 255), - )) - .unwrap(), - ), - ) - .await; - } - crate::app::sagas::test_helpers::action_failure_can_unwind::< SagaSnapshotCreate, _, @@ -2182,7 +2163,7 @@ mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let disk_id = create_org_project_and_disk(&client).await; + let disk_id = create_project_and_disk_and_pool(&client).await; // Build the saga DAG with the provided test parameters let opctx = test_opctx(cptestctx); @@ -2291,7 +2272,7 @@ mod test { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; - let disk_id = create_org_project_and_disk(&client).await; + let disk_id = create_project_and_disk_and_pool(&client).await; // Build the saga DAG with the provided test parameters let opctx = test_opctx(cptestctx); @@ -2352,19 +2333,6 @@ mod test { assert!(output.is_err()); // Attach the disk to an instance, then rerun the saga - populate_ip_pool( - &client, - "default", - Some( - IpRange::try_from(( - Ipv4Addr::new(10, 1, 0, 0), - Ipv4Addr::new(10, 1, 255, 255), - )) - .unwrap(), - ), - ) - .await; - let instance_state = setup_test_instance( cptestctx, client, diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs index 4b5bedf41e..6b48e4087a 100644 --- a/nexus/src/app/sagas/vpc_create.rs +++ b/nexus/src/app/sagas/vpc_create.rs @@ -455,8 +455,8 @@ pub(crate) mod test { db::datastore::DataStore, db::fixed_data::vpc::SERVICES_VPC_ID, db::lookup::LookupPath, }; + use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_project; - use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Name; @@ -469,7 +469,7 @@ pub(crate) mod test { const PROJECT_NAME: &str = "springfield-squidport"; async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 5ac782aee6..21acb45ed3 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6,13 +6,10 @@ use super::{ console_api, device_auth, params, - params::{ProjectSelector, UninitializedSledId}, - shared::UninitializedSled, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, - PhysicalDisk, Project, Rack, Role, Silo, SiloQuotas, SiloUtilization, - Sled, SledInstance, Snapshot, SshKey, Switch, User, UserBuiltin, Vpc, - VpcRouter, VpcSubnet, + PhysicalDisk, Project, Rack, Role, Silo, SiloUtilization, Sled, + Snapshot, SshKey, User, UserBuiltin, Vpc, VpcRouter, VpcSubnet, }, }; use crate::external_api::shared; @@ -40,15 +37,13 @@ use dropshot::{ use ipnetwork::IpNetwork; use nexus_db_queries::authz; use nexus_db_queries::db; -use nexus_db_queries::db::identity::AssetIdentityMetadata; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; -use nexus_db_queries::{ - authz::ApiResource, db::fixed_data::silo::INTERNAL_SILO_ID, -}; +use nexus_types::external_api::views::SiloQuotas; use nexus_types::external_api::views::Utilization; +use nexus_types::identity::AssetIdentityMetadata; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -124,6 +119,10 @@ pub(crate) fn external_api() -> NexusApiDescription { // Operator-Accessible IP Pools API api.register(ip_pool_list)?; api.register(ip_pool_create)?; + api.register(ip_pool_silo_list)?; + api.register(ip_pool_silo_link)?; + api.register(ip_pool_silo_unlink)?; + api.register(ip_pool_silo_update)?; api.register(ip_pool_view)?; api.register(ip_pool_delete)?; api.register(ip_pool_update)?; @@ -1294,7 +1293,7 @@ async fn project_policy_update( // IP Pools -/// List all IP pools that can be used by a given project +/// List all IP pools #[endpoint { method = GET, path = "/v1/ip-pools", @@ -1302,14 +1301,8 @@ async fn project_policy_update( }] async fn project_ip_pool_list( rqctx: RequestContext>, - query_params: Query>, + query_params: Query, ) -> Result>, HttpError> { - // Per https://github.com/oxidecomputer/omicron/issues/2148 - // This is currently the same list as /v1/system/ip-pools, that is to say, - // IP pools that are *available to* a given project, those being ones that - // are not the internal pools for Oxide service usage. This may change - // in the future as the scoping of pools is further developed, but for now, - // this is literally a near-duplicate of `ip_pool_list`: let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -1318,10 +1311,8 @@ async fn project_ip_pool_list( let scan_params = ScanByNameOrId::from_query(&query)?; let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; let pools = nexus - .project_ip_pools_list(&opctx, &project_lookup, &paginated_by) + .silo_ip_pools_list(&opctx, &paginated_by) .await? .into_iter() .map(IpPool::from) @@ -1344,28 +1335,13 @@ async fn project_ip_pool_list( async fn project_ip_pool_view( rqctx: RequestContext>, path_params: Path, - project: Query, ) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let nexus = &apictx.nexus; let pool_selector = path_params.into_inner().pool; - let project_lookup = if let Some(project) = project.into_inner().project - { - Some(nexus.project_lookup(&opctx, ProjectSelector { project })?) - } else { - None - }; - let (authz_pool, pool) = nexus - .project_ip_pool_lookup(&opctx, &pool_selector, &project_lookup)? - .fetch() - .await?; - // TODO(2148): once we've actualy implemented filtering to pools belonging to - // the specified project, we can remove this internal check. - if pool.silo_id == Some(*INTERNAL_SILO_ID) { - return Err(authz_pool.not_found().into()); - } + let pool = nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; Ok(HttpResponseOk(IpPool::from(pool))) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await @@ -1445,6 +1421,8 @@ async fn ip_pool_view( let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let nexus = &apictx.nexus; let pool_selector = path_params.into_inner().pool; + // We do not prevent the service pool from being fetched by name or ID + // like we do for update, delete, associate. let (.., pool) = nexus.ip_pool_lookup(&opctx, &pool_selector)?.fetch().await?; Ok(HttpResponseOk(IpPool::from(pool))) @@ -1498,6 +1476,128 @@ async fn ip_pool_update( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// List an IP pool's linked silos +#[endpoint { + method = GET, + path = "/v1/system/ip-pools/{pool}/silos", + tags = ["system/networking"], +}] +async fn ip_pool_silo_list( + rqctx: RequestContext>, + path_params: Path, + // paginating by resource_id because they're unique per pool. most robust + // option would be to paginate by a composite key representing the (pool, + // resource_type, resource) + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + + let path = path_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + + let assocs = nexus + .ip_pool_silo_list(&opctx, &pool_lookup, &pag_params) + .await? + .into_iter() + .map(|assoc| assoc.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + assocs, + &|_, x: &views::IpPoolSilo| x.silo_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Make an IP pool available within a silo +#[endpoint { + method = POST, + path = "/v1/system/ip-pools/{pool}/silos", + tags = ["system/networking"], +}] +async fn ip_pool_silo_link( + rqctx: RequestContext>, + path_params: Path, + resource_assoc: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let resource_assoc = resource_assoc.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let assoc = nexus + .ip_pool_link_silo(&opctx, &pool_lookup, &resource_assoc) + .await?; + Ok(HttpResponseCreated(assoc.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Unlink an IP pool from a silo +/// +/// Will fail if there are any outstanding IPs allocated in the silo. +#[endpoint { + method = DELETE, + path = "/v1/system/ip-pools/{pool}/silos/{silo}", + tags = ["system/networking"], +}] +async fn ip_pool_silo_unlink( + rqctx: RequestContext>, + path_params: Path, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + nexus.ip_pool_unlink_silo(&opctx, &pool_lookup, &silo_lookup).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Make an IP pool default or not-default for a silo +/// +/// When a pool is made default for a silo, any existing default will remain +/// linked to the silo, but will no longer be the default. +#[endpoint { + method = PUT, + path = "/v1/system/ip-pools/{pool}/silos/{silo}", + tags = ["system/networking"], +}] +async fn ip_pool_silo_update( + rqctx: RequestContext>, + path_params: Path, + update: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let update = update.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let assoc = nexus + .ip_pool_silo_update(&opctx, &pool_lookup, &silo_lookup, &update) + .await?; + Ok(HttpResponseOk(assoc.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// Fetch the IP pool used for Oxide services #[endpoint { method = GET, @@ -4660,7 +4760,7 @@ async fn rack_view( async fn sled_list_uninitialized( rqctx: RequestContext>, query: Query>, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); // We don't actually support real pagination let pag_params = query.into_inner(); @@ -4691,7 +4791,7 @@ async fn sled_list_uninitialized( }] async fn sled_add( rqctx: RequestContext>, - sled: TypedBody, + sled: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; @@ -4805,7 +4905,7 @@ async fn sled_instance_list( rqctx: RequestContext>, path_params: Path, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -4826,7 +4926,7 @@ async fn sled_instance_list( Ok(HttpResponseOk(ScanById::results_page( &query, sled_instances, - &|_, sled_instance: &SledInstance| sled_instance.identity.id, + &|_, sled_instance: &views::SledInstance| sled_instance.identity.id, )?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await @@ -4875,7 +4975,7 @@ async fn physical_disk_list( async fn switch_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -4890,7 +4990,7 @@ async fn switch_list( Ok(HttpResponseOk(ScanById::results_page( &query, switches, - &|_, switch: &Switch| switch.identity.id, + &|_, switch: &views::Switch| switch.identity.id, )?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await @@ -4905,7 +5005,7 @@ async fn switch_list( async fn switch_view( rqctx: RequestContext>, path_params: Path, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index c72c7ad780..c2516a1509 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -12,6 +12,7 @@ use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; use dropshot::Method; use http::StatusCode; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_test_interface::NexusServer; use nexus_types::external_api::params; use nexus_types::external_api::params::PhysicalDiskKind; @@ -26,6 +27,7 @@ use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; use nexus_types::external_api::views::User; use nexus_types::external_api::views::{Project, Silo, Vpc, VpcRouter}; +use nexus_types::identity::Resource; use nexus_types::internal_api::params as internal_params; use nexus_types::internal_api::params::Baseboard; use omicron_common::api::external::ByteCount; @@ -55,6 +57,41 @@ where .unwrap() } +pub async fn object_get( + client: &ClientTestContext, + path: &str, +) -> OutputType +where + OutputType: serde::de::DeserializeOwned, +{ + NexusRequest::object_get(client, path) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap_or_else(|e| { + panic!("failed to make \"GET\" request to {path}: {e}") + }) + .parsed_body() + .unwrap() +} + +pub async fn object_get_error( + client: &ClientTestContext, + path: &str, + status: StatusCode, +) -> HttpErrorResponseBody { + NexusRequest::new( + RequestBuilder::new(client, Method::GET, path) + .expect_status(Some(status)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap() +} + pub async fn object_create( client: &ClientTestContext, path: &str, @@ -68,13 +105,36 @@ where .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .unwrap_or_else(|_| { - panic!("failed to make \"create\" request to {path}") + .unwrap_or_else(|e| { + panic!("failed to make \"POST\" request to {path}: {e}") }) .parsed_body() .unwrap() } +/// Make a POST, assert status code, return error response body +pub async fn object_create_error( + client: &ClientTestContext, + path: &str, + input: &InputType, + status: StatusCode, +) -> HttpErrorResponseBody +where + InputType: serde::Serialize, +{ + NexusRequest::new( + RequestBuilder::new(client, Method::POST, path) + .body(Some(&input)) + .expect_status(Some(status)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap() +} + pub async fn object_put( client: &ClientTestContext, path: &str, @@ -88,41 +148,60 @@ where .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .unwrap_or_else(|_| panic!("failed to make \"PUT\" request to {path}")) + .unwrap_or_else(|e| { + panic!("failed to make \"PUT\" request to {path}: {e}") + }) .parsed_body() .unwrap() } +pub async fn object_put_error( + client: &ClientTestContext, + path: &str, + input: &InputType, + status: StatusCode, +) -> HttpErrorResponseBody +where + InputType: serde::Serialize, +{ + NexusRequest::new( + RequestBuilder::new(client, Method::PUT, path) + .body(Some(&input)) + .expect_status(Some(status)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap() +} + pub async fn object_delete(client: &ClientTestContext, path: &str) { NexusRequest::object_delete(client, path) .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .unwrap_or_else(|_| { - panic!("failed to make \"delete\" request to {path}") + .unwrap_or_else(|e| { + panic!("failed to make \"DELETE\" request to {path}: {e}") }); } -pub async fn populate_ip_pool( +pub async fn object_delete_error( client: &ClientTestContext, - pool_name: &str, - ip_range: Option, -) -> IpPoolRange { - let ip_range = ip_range.unwrap_or_else(|| { - use std::net::Ipv4Addr; - IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 0), - Ipv4Addr::new(10, 0, 255, 255), - )) - .unwrap() - }); - let range = object_create( - client, - format!("/v1/system/ip-pools/{}/ranges/add", pool_name).as_str(), - &ip_range, + path: &str, + status: StatusCode, +) -> HttpErrorResponseBody { + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, path) + .expect_status(Some(status)), ) - .await; - range + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap() } /// Create an IP pool with a single range for testing. @@ -134,7 +213,6 @@ pub async fn create_ip_pool( client: &ClientTestContext, pool_name: &str, ip_range: Option, - silo: Option, ) -> (IpPool, IpPoolRange) { let pool = object_create( client, @@ -144,15 +222,47 @@ pub async fn create_ip_pool( name: pool_name.parse().unwrap(), description: String::from("an ip pool"), }, - silo: silo.map(|id| NameOrId::Id(id)), - is_default: false, }, ) .await; - let range = populate_ip_pool(client, pool_name, ip_range).await; + + let ip_range = ip_range.unwrap_or_else(|| { + use std::net::Ipv4Addr; + IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 0), + Ipv4Addr::new(10, 0, 255, 255), + )) + .unwrap() + }); + let url = format!("/v1/system/ip-pools/{}/ranges/add", pool_name); + let range = object_create(client, &url, &ip_range).await; (pool, range) } +pub async fn link_ip_pool( + client: &ClientTestContext, + pool_name: &str, + silo_id: &Uuid, + is_default: bool, +) { + let link = + params::IpPoolSiloLink { silo: NameOrId::Id(*silo_id), is_default }; + let url = format!("/v1/system/ip-pools/{pool_name}/silos"); + object_create::( + client, &url, &link, + ) + .await; +} + +/// What you want for any test that is not testing IP logic specifically +pub async fn create_default_ip_pool( + client: &ClientTestContext, +) -> views::IpPool { + let (pool, ..) = create_ip_pool(&client, "default", None).await; + link_ip_pool(&client, "default", &DEFAULT_SILO.id(), true).await; + pool +} + pub async fn create_floating_ip( client: &ClientTestContext, fip_name: &str, diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index a7c9c99509..b9023a8212 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -18,12 +18,12 @@ use nexus_test_utils::http_testing::Collection; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::identity_eq; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -95,8 +95,8 @@ fn get_disk_detach_url(instance: &NameOrId) -> String { } } -async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; +async fn create_project_and_pool(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } @@ -107,7 +107,7 @@ async fn test_disk_not_found_before_creation( ) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // List disks. There aren't any yet. @@ -186,7 +186,7 @@ async fn test_disk_create_attach_detach_delete( ) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let project_id = create_org_and_project(client).await; + let project_id = create_project_and_pool(client).await; let nexus = &cptestctx.server.apictx().nexus; let disks_url = get_disks_url(); @@ -315,7 +315,7 @@ async fn test_disk_create_disk_that_already_exists_fails( ) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a disk. @@ -360,7 +360,7 @@ async fn test_disk_create_disk_that_already_exists_fails( async fn test_disk_slot_assignment(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let nexus = &cptestctx.server.apictx().nexus; let disk_names = ["a", "b", "c", "d"]; @@ -467,7 +467,7 @@ async fn test_disk_move_between_instances(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let disks_url = get_disks_url(); // Create a disk. @@ -670,7 +670,7 @@ async fn test_disk_creation_region_requested_then_started( ) { let client = &cptestctx.external_client; let test = DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Before we create a disk, set the response from the Crucible Agent: // no matter what regions get requested, they'll always *start* as @@ -689,7 +689,7 @@ async fn test_disk_region_creation_failure( ) { let client = &cptestctx.external_client; let test = DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Before we create a disk, set the response from the Crucible Agent: // no matter what regions get requested, they'll always fail. @@ -745,7 +745,7 @@ async fn test_disk_invalid_block_size_rejected( ) { let client = &cptestctx.external_client; let _test = DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Attempt to allocate the disk, observe a server error. let disk_size = ByteCount::from_gibibytes_u32(3); @@ -788,7 +788,7 @@ async fn test_disk_reject_total_size_not_divisible_by_block_size( ) { let client = &cptestctx.external_client; let _test = DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Attempt to allocate the disk, observe a server error. let disk_size = ByteCount::from(3 * 1024 * 1024 * 1024 + 256); @@ -829,7 +829,7 @@ async fn test_disk_reject_total_size_less_than_min_disk_size_bytes( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disk_size = ByteCount::from(MIN_DISK_SIZE_BYTES / 2); @@ -871,7 +871,7 @@ async fn test_disk_reject_total_size_greater_than_max_disk_size_bytes( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disk_size = ByteCount::try_from(MAX_DISK_SIZE_BYTES + (1 << 30)).unwrap(); @@ -916,7 +916,7 @@ async fn test_disk_reject_total_size_not_divisible_by_min_disk_size( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disk_size = ByteCount::from(1024 * 1024 * 1024 + 512); @@ -971,7 +971,7 @@ async fn test_disk_backed_by_multiple_region_sets( test.add_zpool_with_dataset(cptestctx, 10).await; test.add_zpool_with_dataset(cptestctx, 10).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Ask for a 20 gibibyte disk. let disk_size = ByteCount::from_gibibytes_u32(20); @@ -1004,7 +1004,7 @@ async fn test_disk_backed_by_multiple_region_sets( async fn test_disk_too_big(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); @@ -1044,7 +1044,7 @@ async fn test_disk_virtual_provisioning_collection( let _test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(client).await; let project_id1 = create_project(client, PROJECT_NAME).await.identity.id; let project_id2 = create_project(client, PROJECT_NAME_2).await.identity.id; @@ -1252,8 +1252,7 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - let project_id1 = create_project(client, PROJECT_NAME).await.identity.id; + let project_id1 = create_project_and_pool(client).await; let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); @@ -1393,7 +1392,6 @@ async fn test_phantom_disk_rename(cptestctx: &ControlPlaneTestContext) { let _disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; let _project_id1 = create_project(client, PROJECT_NAME).await.identity.id; // Create a 1 GB disk @@ -1519,7 +1517,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); - create_org_and_project(client).await; + create_project_and_pool(client).await; // Total occupied size should start at 0 for zpool in &test.zpools { @@ -1688,7 +1686,7 @@ async fn test_multiple_disks_multiple_zpools( test.add_zpool_with_dataset(cptestctx, 10).await; test.add_zpool_with_dataset(cptestctx, 10).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Ask for a 10 gibibyte disk, this should succeed let disk_size = ByteCount::from_gibibytes_u32(10); @@ -1765,7 +1763,7 @@ async fn test_disk_metrics(cptestctx: &ControlPlaneTestContext) { let oximeter = &cptestctx.oximeter; let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - let project_id = create_org_and_project(client).await; + let project_id = create_project_and_pool(client).await; let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; oximeter.force_collect().await; @@ -1838,7 +1836,7 @@ async fn test_disk_metrics_paginated(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk(&client, PROJECT_NAME, DISK_NAME).await; create_instance_with_disk(client).await; @@ -1900,7 +1898,7 @@ async fn test_disk_metrics_paginated(cptestctx: &ControlPlaneTestContext) { async fn test_disk_create_for_importing(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); let new_disk = params::DiskCreate { @@ -1943,7 +1941,7 @@ async fn test_project_delete_disk_no_auth_idempotent( ) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Create a disk let disks_url = get_disks_url(); diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 8708083124..b7b838ca50 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -599,7 +599,7 @@ pub static DEMO_IMAGE_CREATE: Lazy = // IP Pools pub static DEMO_IP_POOLS_PROJ_URL: Lazy = - Lazy::new(|| format!("/v1/ip-pools?project={}", *DEMO_PROJECT_NAME)); + Lazy::new(|| "/v1/ip-pools".to_string()); pub const DEMO_IP_POOLS_URL: &'static str = "/v1/system/ip-pools"; pub static DEMO_IP_POOL_NAME: Lazy = Lazy::new(|| "default".parse().unwrap()); @@ -609,8 +609,6 @@ pub static DEMO_IP_POOL_CREATE: Lazy = name: DEMO_IP_POOL_NAME.clone(), description: String::from("an IP pool"), }, - silo: None, - is_default: true, }); pub static DEMO_IP_POOL_PROJ_URL: Lazy = Lazy::new(|| { format!( @@ -627,6 +625,19 @@ pub static DEMO_IP_POOL_UPDATE: Lazy = description: Some(String::from("a new IP pool")), }, }); +pub static DEMO_IP_POOL_SILOS_URL: Lazy = + Lazy::new(|| format!("{}/silos", *DEMO_IP_POOL_URL)); +pub static DEMO_IP_POOL_SILOS_BODY: Lazy = + Lazy::new(|| params::IpPoolSiloLink { + silo: NameOrId::Id(DEFAULT_SILO.identity().id), + is_default: true, // necessary for demo instance create to go through + }); + +pub static DEMO_IP_POOL_SILO_URL: Lazy = + Lazy::new(|| format!("{}/silos/{}", *DEMO_IP_POOL_URL, *DEMO_SILO_NAME)); +pub static DEMO_IP_POOL_SILO_UPDATE_BODY: Lazy = + Lazy::new(|| params::IpPoolSiloUpdate { is_default: false }); + pub static DEMO_IP_POOL_RANGE: Lazy = Lazy::new(|| { IpRange::V4( Ipv4Range::new( @@ -980,6 +991,26 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + // IP pool silos endpoint + VerifyEndpoint { + url: &DEMO_IP_POOL_SILOS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Post(serde_json::to_value(&*DEMO_IP_POOL_SILOS_BODY).unwrap()), + ], + }, + VerifyEndpoint { + url: &DEMO_IP_POOL_SILO_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Delete, + AllowedMethod::Put(serde_json::to_value(&*DEMO_IP_POOL_SILO_UPDATE_BODY).unwrap()), + ], + }, + // IP Pool ranges endpoint VerifyEndpoint { url: &DEMO_IP_POOL_RANGES_URL, diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index daec8e2064..3b6127ceb1 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -12,19 +12,26 @@ use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; use http::Method; use http::StatusCode; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::create_silo; -use nexus_test_utils::resource_helpers::populate_ip_pool; +use nexus_test_utils::resource_helpers::link_ip_pool; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::object_create_error; +use nexus_test_utils::resource_helpers::object_delete; +use nexus_test_utils::resource_helpers::object_delete_error; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::shared; use nexus_types::external_api::views::FloatingIp; +use nexus_types::identity::Resource; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -59,7 +66,7 @@ pub fn get_floating_ip_by_id_url(fip_id: &Uuid) -> String { async fn test_floating_ip_access(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; // Create a floating IP from the default pool. @@ -106,12 +113,15 @@ async fn test_floating_ip_access(cptestctx: &ControlPlaneTestContext) { async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + // automatically linked to current silo + create_default_ip_pool(&client).await; + let other_pool_range = IpRange::V4( Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 5)) .unwrap(), ); - create_ip_pool(&client, "other-pool", Some(other_pool_range), None).await; + // not automatically linked to currently silo. see below + create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; let project = create_project(client, PROJECT_NAME).await; @@ -146,16 +156,27 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) { assert_eq!(fip.instance_id, None); assert_eq!(fip.ip, ip_addr); - // Create with no chosen IP from fleet-scoped named pool. + // Creating with other-pool fails with 404 until it is linked to the current silo let fip_name = FIP_NAMES[2]; - let fip = create_floating_ip( - client, - fip_name, - project.identity.name.as_str(), - None, - Some("other-pool"), - ) - .await; + let params = params::FloatingIpCreate { + identity: IdentityMetadataCreateParams { + name: fip_name.parse().unwrap(), + description: String::from("a floating ip"), + }, + address: None, + pool: Some(NameOrId::Name("other-pool".parse().unwrap())), + }; + let url = format!("/v1/floating-ips?project={}", project.identity.name); + let error = + object_create_error(client, &url, ¶ms, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, "not found: ip-pool with name \"other-pool\""); + + // now link the pool and everything should work with the exact same params + let silo_id = DEFAULT_SILO.id(); + link_ip_pool(&client, "other-pool", &silo_id, false).await; + + // Create with no chosen IP from named pool. + let fip: FloatingIp = object_create(client, &url, ¶ms).await; assert_eq!(fip.identity.name.as_str(), fip_name); assert_eq!(fip.project_id, project.identity.id); assert_eq!(fip.instance_id, None); @@ -184,8 +205,6 @@ async fn test_floating_ip_create_fails_in_other_silo_pool( ) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - let project = create_project(client, PROJECT_NAME).await; // Create other silo and pool linked to that silo @@ -200,13 +219,8 @@ async fn test_floating_ip_create_fails_in_other_silo_pool( Ipv4Range::new(Ipv4Addr::new(10, 2, 0, 1), Ipv4Addr::new(10, 2, 0, 5)) .unwrap(), ); - create_ip_pool( - &client, - "external-silo-pool", - Some(other_pool_range), - Some(other_silo.identity.id), - ) - .await; + create_ip_pool(&client, "external-silo-pool", Some(other_pool_range)).await; + // don't link pool to silo yet let fip_name = FIP_NAMES[4]; @@ -223,14 +237,19 @@ async fn test_floating_ip_create_fails_in_other_silo_pool( pool: Some(NameOrId::Name("external-silo-pool".parse().unwrap())), }; - let error = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &url) - .body(Some(&body)) - .expect_status(Some(StatusCode::NOT_FOUND)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + assert_eq!( + error.message, + "not found: ip-pool with name \"external-silo-pool\"" + ); + + // error is the same after linking the pool to the other silo + link_ip_pool(&client, "external-silo-pool", &other_silo.identity.id, false) + .await; + + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; assert_eq!( error.message, "not found: ip-pool with name \"external-silo-pool\"" @@ -243,7 +262,7 @@ async fn test_floating_ip_create_ip_in_use( ) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; let contested_ip = "10.0.0.0".parse().unwrap(); @@ -291,7 +310,7 @@ async fn test_floating_ip_create_name_in_use( ) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; let contested_name = FIP_NAMES[0]; @@ -340,7 +359,7 @@ async fn test_floating_ip_create_name_in_use( async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; let fip = create_floating_ip( @@ -352,15 +371,24 @@ async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { ) .await; + // unlink fails because there are outstanding IPs + let silo_id = DEFAULT_SILO.id(); + let silo_link_url = + format!("/v1/system/ip-pools/default/silos/{}", silo_id); + let error = + object_delete_error(client, &silo_link_url, StatusCode::BAD_REQUEST) + .await; + assert_eq!( + error.message, + "IP addresses from this pool are in use in the linked silo" + ); + // Delete the floating IP. - NexusRequest::object_delete( - client, - &get_floating_ip_by_id_url(&fip.identity.id), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); + let floating_ip_url = get_floating_ip_by_id_url(&fip.identity.id); + object_delete(client, &floating_ip_url).await; + + // now unlink works + object_delete(client, &silo_link_url).await; } #[nexus_test] @@ -369,7 +397,7 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let project = create_project(client, PROJECT_NAME).await; let fip = create_floating_ip( diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 44b65fa67b..99ef165188 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -20,15 +20,20 @@ use nexus_test_interface::NexusServer; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_local_user; use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::grant_iam; +use nexus_test_utils::resource_helpers::link_ip_pool; use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::object_create_error; +use nexus_test_utils::resource_helpers::object_delete; +use nexus_test_utils::resource_helpers::object_delete_error; +use nexus_test_utils::resource_helpers::object_put; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils::start_sled_agent; use nexus_types::external_api::shared::IpKind; @@ -102,10 +107,9 @@ fn default_vpc_subnets_url() -> String { format!("/v1/vpc-subnets?{}&vpc=default", get_project_selector()) } -async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; - let project = create_project(client, PROJECT_NAME).await; - project.identity.id +async fn create_project_and_pool(client: &ClientTestContext) -> views::Project { + create_default_ip_pool(client).await; + create_project(client, PROJECT_NAME).await } #[nexus_test] @@ -163,8 +167,7 @@ async fn test_instances_access_before_create_returns_not_found( async fn test_instance_access(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - let project = create_project(client, PROJECT_NAME).await; + let project = create_project_and_pool(client).await; // Create an instance. let instance_name = "test-instance"; @@ -212,7 +215,7 @@ async fn test_instances_create_reboot_halt( let nexus = &apictx.nexus; let instance_name = "just-rainsticks"; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create an instance. let instance_url = get_instance_url(instance_name); @@ -538,7 +541,7 @@ async fn test_instance_start_creates_networking_state( ); } - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let instance_url = get_instance_url(instance_name); let instance = create_instance(client, PROJECT_NAME, instance_name).await; @@ -639,7 +642,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let instance_url = get_instance_url(instance_name); // Explicitly create an instance with no disks. Simulated sled agent assumes @@ -743,8 +746,8 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { } // Set up the project and test instance. - populate_ip_pool(&client, "default", None).await; - create_project(client, PROJECT_NAME).await; + create_project_and_pool(client).await; + let instance = nexus_test_utils::resource_helpers::create_instance_with( client, PROJECT_NAME, @@ -879,7 +882,7 @@ async fn test_instance_failed_after_sled_agent_error( let instance_name = "losing-is-fun"; // Create and start the test instance. - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let instance_url = get_instance_url(instance_name); let instance = create_instance(client, PROJECT_NAME, instance_name).await; instance_simulate(nexus, &instance.identity.id).await; @@ -1010,8 +1013,8 @@ async fn test_instance_metrics(cptestctx: &ControlPlaneTestContext) { let datastore = nexus.datastore(); // Create an IP pool and project that we'll use for testing. - populate_ip_pool(&client, "default", None).await; - let project_id = create_project(&client, PROJECT_NAME).await.identity.id; + let project = create_project_and_pool(&client).await; + let project_id = project.identity.id; // Query the view of these metrics stored within CRDB let opctx = @@ -1101,7 +1104,8 @@ async fn test_instance_metrics_with_migration( .await .unwrap(); - let project_id = create_org_and_project(&client).await; + let project = create_project_and_pool(&client).await; + let project_id = project.identity.id; let instance_url = get_instance_url(instance_name); // Explicitly create an instance with no disks. Simulated sled agent assumes @@ -1210,7 +1214,7 @@ async fn test_instances_create_stopped_start( let nexus = &apictx.nexus; let instance_name = "just-rainsticks"; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create an instance in a stopped state. let instance: Instance = object_create( @@ -1260,7 +1264,7 @@ async fn test_instances_delete_fails_when_running_succeeds_when_stopped( let nexus = &apictx.nexus; let instance_name = "just-rainsticks"; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create an instance. let instance_url = get_instance_url(instance_name); @@ -1356,7 +1360,7 @@ async fn test_instance_using_image_from_other_project_fails( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create an image in springfield-squidport. let images_url = format!("/v1/images?project={}", PROJECT_NAME); @@ -1437,7 +1441,7 @@ async fn test_instance_create_saga_removes_instance_database_record( let client = &cptestctx.external_client; // Create test IP pool, organization and project - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // The network interface parameters. let default_name = "default".parse::().unwrap(); @@ -1552,7 +1556,7 @@ async fn test_instance_with_single_explicit_ip_address( ) { let client = &cptestctx.external_client; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create the parameters for the interface. let default_name = "default".parse::().unwrap(); @@ -1626,7 +1630,7 @@ async fn test_instance_with_new_custom_network_interfaces( ) { let client = &cptestctx.external_client; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create a VPC Subnet other than the default. // // We'll create one interface in the default VPC Subnet and one in this new @@ -1776,7 +1780,7 @@ async fn test_instance_create_delete_network_interface( let nexus = &cptestctx.server.apictx().nexus; let instance_name = "nic-attach-test-inst"; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create the VPC Subnet for the secondary interface let secondary_subnet = params::VpcSubnetCreate { @@ -2016,7 +2020,7 @@ async fn test_instance_update_network_interfaces( let nexus = &cptestctx.server.apictx().nexus; let instance_name = "nic-update-test-inst"; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create the VPC Subnet for the secondary interface let secondary_subnet = params::VpcSubnetCreate { @@ -2480,7 +2484,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Create the "probablydata" disk create_disk(&client, PROJECT_NAME, "probablydata").await; @@ -2550,7 +2554,7 @@ async fn test_instance_create_attach_disks( // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let attachable_disk = create_disk(&client, PROJECT_NAME, "attachable-disk").await; @@ -2624,7 +2628,7 @@ async fn test_instance_create_attach_disks_undo( // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let regular_disk = create_disk(&client, PROJECT_NAME, "a-reg-disk").await; let faulted_disk = create_disk(&client, PROJECT_NAME, "faulted-disk").await; @@ -2717,7 +2721,7 @@ async fn test_attach_eight_disks_to_instance( // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Make 8 disks for i in 0..8 { @@ -2870,7 +2874,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Make 8 disks for i in 0..8 { @@ -2970,7 +2974,7 @@ async fn test_disks_detached_when_instance_destroyed( // Test pre-reqs DiskTest::new(&cptestctx).await; - create_org_and_project(&client).await; + create_project_and_pool(&client).await; // Make 8 disks for i in 0..8 { @@ -3136,7 +3140,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Attempt to create the instance, observe a server error. let instance_name = "just-rainsticks"; @@ -3185,7 +3189,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Attempt to create the instance, observe a server error. let instance_name = "just-rainsticks"; @@ -3233,7 +3237,7 @@ async fn test_instances_memory_greater_than_max_size( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Attempt to create the instance, observe a server error. let instance_name = "just-rainsticks"; @@ -3329,8 +3333,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_project(client, PROJECT_NAME).await; - populate_ip_pool(&client, "default", None).await; + create_project_and_pool(client).await; // The third item in each tuple specifies whether instance start should // succeed or fail if all these configs are visited in order and started in @@ -3396,8 +3399,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_project(client, PROJECT_NAME).await; - populate_ip_pool(&client, "default", None).await; + create_project_and_pool(client).await; let too_many_cpus = InstanceCpuCount::try_from(i64::from(MAX_VCPU_PER_INSTANCE + 1)) @@ -3438,8 +3440,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; - create_project(client, PROJECT_NAME).await; - populate_ip_pool(&client, "default", None).await; + create_project_and_pool(client).await; let configs = vec![ ( @@ -3511,7 +3512,7 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - create_org_and_project(&client).await; + create_project_and_pool(&client).await; let instance_url = get_instance_url(instance_name); // Make sure we get a 404 if we try to access the serial console before creation. @@ -3628,89 +3629,244 @@ async fn test_instance_ephemeral_ip_from_correct_pool( // // The first is given to the "default" pool, the provided to a distinct // explicit pool. - let default_pool_range = IpRange::V4( + let range1 = IpRange::V4( Ipv4Range::new( std::net::Ipv4Addr::new(10, 0, 0, 1), std::net::Ipv4Addr::new(10, 0, 0, 5), ) .unwrap(), ); - let other_pool_range = IpRange::V4( + let range2 = IpRange::V4( Ipv4Range::new( std::net::Ipv4Addr::new(10, 1, 0, 1), std::net::Ipv4Addr::new(10, 1, 0, 5), ) .unwrap(), ); - populate_ip_pool(&client, "default", Some(default_pool_range)).await; - create_ip_pool(&client, "other-pool", Some(other_pool_range), None).await; + + // make first pool the default for the priv user's silo + create_ip_pool(&client, "pool1", Some(range1)).await; + link_ip_pool(&client, "pool1", &DEFAULT_SILO.id(), /*default*/ true).await; + + // second pool is associated with the silo but not default + create_ip_pool(&client, "pool2", Some(range2)).await; + link_ip_pool(&client, "pool2", &DEFAULT_SILO.id(), /*default*/ false).await; // Create an instance with pool name blank, expect IP from default pool - create_instance_with_pool(client, "default-pool-inst", None).await; + create_instance_with_pool(client, "pool1-inst", None).await; - let ip = fetch_instance_ephemeral_ip(client, "default-pool-inst").await; + let ip = fetch_instance_ephemeral_ip(client, "pool1-inst").await; assert!( - ip.ip >= default_pool_range.first_address() - && ip.ip <= default_pool_range.last_address(), - "Expected ephemeral IP to come from default pool" + ip.ip >= range1.first_address() && ip.ip <= range1.last_address(), + "Expected ephemeral IP to come from pool1" ); - // Create an instance explicitly using the "other-pool". - create_instance_with_pool(client, "other-pool-inst", Some("other-pool")) - .await; - let ip = fetch_instance_ephemeral_ip(client, "other-pool-inst").await; + // Create an instance explicitly using the non-default "other-pool". + create_instance_with_pool(client, "pool2-inst", Some("pool2")).await; + let ip = fetch_instance_ephemeral_ip(client, "pool2-inst").await; assert!( - ip.ip >= other_pool_range.first_address() - && ip.ip <= other_pool_range.last_address(), - "Expected ephemeral IP to come from other pool" + ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + "Expected ephemeral IP to come from pool2" ); - // now create a third pool, a silo default, to confirm it gets used. not - // using create_ip_pool because we need to specify a silo and default: true - let pool_name = "silo-pool"; - let _silo_pool: views::IpPool = object_create( + // make pool2 default and create instance with default pool. check that it now it comes from pool2 + let _: views::IpPoolSilo = object_put( client, - "/v1/system/ip-pools", - ¶ms::IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: pool_name.parse().unwrap(), - description: String::from("an ip pool"), - }, - silo: Some(NameOrId::Id(DEFAULT_SILO.id())), - is_default: true, + &format!("/v1/system/ip-pools/pool2/silos/{}", DEFAULT_SILO.id()), + ¶ms::IpPoolSiloUpdate { is_default: true }, + ) + .await; + + create_instance_with_pool(client, "pool2-inst2", None).await; + let ip = fetch_instance_ephemeral_ip(client, "pool2-inst2").await; + assert!( + ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + "Expected ephemeral IP to come from pool2" + ); + + // try to delete association with pool1, but it fails because there is an + // instance with an IP from the pool in this silo + let pool1_silo_url = + format!("/v1/system/ip-pools/pool1/silos/{}", DEFAULT_SILO.id()); + let error = + object_delete_error(client, &pool1_silo_url, StatusCode::BAD_REQUEST) + .await; + assert_eq!( + error.message, + "IP addresses from this pool are in use in the linked silo" + ); + + // stop and delete instances with IPs from pool1. perhaps surprisingly, that + // includes pool2-inst also because the SNAT IP comes from the default pool + // even when different pool is specified for the ephemeral IP + stop_instance(&cptestctx, "pool1-inst").await; + stop_instance(&cptestctx, "pool2-inst").await; + + object_delete(client, &pool1_silo_url).await; + + // create instance with pool1, expecting allocation to fail + let instance_name = "pool1-inst-fail"; + let url = format!("/v1/instances?project={}", PROJECT_NAME); + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: String::from("the_host"), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: Some("pool1".parse().unwrap()), + }], + disks: vec![], + start: true, + }; + let error = object_create_error( + client, + &url, + &instance_params, + StatusCode::NOT_FOUND, ) .await; - let silo_pool_range = IpRange::V4( + assert_eq!(error.message, "not found: ip-pool with name \"pool1\""); +} + +async fn stop_instance( + cptestctx: &ControlPlaneTestContext, + instance_name: &str, +) { + let client = &cptestctx.external_client; + let instance = + instance_post(&client, instance_name, InstanceOp::Stop).await; + let nexus = &cptestctx.server.apictx().nexus; + instance_simulate(nexus, &instance.identity.id).await; + let url = + format!("/v1/instances/{}?project={}", instance_name, PROJECT_NAME); + object_delete(client, &url).await; +} + +// IP pool that exists but is not associated with any silo (or with a silo other +// than the current user's) cannot be used to get IPs +#[nexus_test] +async fn test_instance_ephemeral_ip_from_orphan_pool( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _ = create_project(&client, PROJECT_NAME).await; + + // make first pool the default for the priv user's silo + create_ip_pool(&client, "default", None).await; + link_ip_pool(&client, "default", &DEFAULT_SILO.id(), true).await; + + let orphan_pool_range = IpRange::V4( Ipv4Range::new( - std::net::Ipv4Addr::new(10, 2, 0, 1), - std::net::Ipv4Addr::new(10, 2, 0, 5), + std::net::Ipv4Addr::new(10, 1, 0, 1), + std::net::Ipv4Addr::new(10, 1, 0, 5), ) .unwrap(), ); - populate_ip_pool(client, pool_name, Some(silo_pool_range)).await; + create_ip_pool(&client, "orphan-pool", Some(orphan_pool_range)).await; - create_instance_with_pool(client, "silo-pool-inst", Some("silo-pool")) - .await; - let ip = fetch_instance_ephemeral_ip(client, "silo-pool-inst").await; - assert!( - ip.ip >= silo_pool_range.first_address() - && ip.ip <= silo_pool_range.last_address(), - "Expected ephemeral IP to come from the silo default pool" + let instance_name = "orphan-pool-inst"; + let body = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: String::from("the_host"), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: Some("orphan-pool".parse().unwrap()), + }], + disks: vec![], + start: true, + }; + + // instance create 404s + let url = format!("/v1/instances?project={}", PROJECT_NAME); + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + + assert_eq!(error.error_code.unwrap(), "ObjectNotFound".to_string()); + assert_eq!( + error.message, + "not found: ip-pool with name \"orphan-pool\"".to_string() ); - // we can still specify other pool even though we now have a silo default - create_instance_with_pool(client, "other-pool-inst-2", Some("other-pool")) - .await; + // associate the pool with a different silo and we should get the same + // error on instance create + let params = params::IpPoolSiloLink { + silo: NameOrId::Name(cptestctx.silo_name.clone()), + is_default: false, + }; + let _: views::IpPoolSilo = + object_create(client, "/v1/system/ip-pools/orphan-pool/silos", ¶ms) + .await; - let ip = fetch_instance_ephemeral_ip(client, "other-pool-inst-2").await; - assert!( - ip.ip >= other_pool_range.first_address() - && ip.ip <= other_pool_range.last_address(), - "Expected ephemeral IP to come from the other pool" + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + + assert_eq!(error.error_code.unwrap(), "ObjectNotFound".to_string()); + assert_eq!( + error.message, + "not found: ip-pool with name \"orphan-pool\"".to_string() ); } +// Test the error when creating an instance with an IP from the default pool, +// but there is no default pool +#[nexus_test] +async fn test_instance_ephemeral_ip_no_default_pool_error( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _ = create_project(&client, PROJECT_NAME).await; + + // important: no pool create, so there is no pool + + let body = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "no-default-pool".parse().unwrap(), + description: "".to_string(), + }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: String::from("the_host"), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: None, // <--- the only important thing here + }], + disks: vec![], + start: true, + }; + + let url = format!("/v1/instances?project={}", PROJECT_NAME); + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + let msg = "not found: ip-pool with id \"Default pool for current silo\"" + .to_string(); + assert_eq!(error.message, msg); + + // same deal if you specify a pool that doesn't exist + let body = params::InstanceCreate { + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: Some("nonexistent-pool".parse().unwrap()), + }], + ..body + }; + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, msg); +} + #[nexus_test] async fn test_instance_attach_several_external_ips( cptestctx: &ControlPlaneTestContext, @@ -3727,7 +3883,12 @@ async fn test_instance_attach_several_external_ips( ) .unwrap(), ); - populate_ip_pool(&client, "default", Some(default_pool_range)).await; + create_ip_pool(&client, "default", Some(default_pool_range)).await; + link_ip_pool(&client, "default", &DEFAULT_SILO.id(), true).await; + + // this doesn't work as a replacement for the above. figure out why and + // probably delete it + // create_default_ip_pool(&client).await; // Create several floating IPs for the instance, totalling 8 IPs. let mut external_ip_create = @@ -3797,47 +3958,35 @@ async fn test_instance_allow_only_one_ephemeral_ip( let _ = create_project(&client, PROJECT_NAME).await; - // Create one IP pool with space for two ephemerals. - let default_pool_range = IpRange::V4( - Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 1), - std::net::Ipv4Addr::new(10, 0, 0, 2), - ) - .unwrap(), - ); - populate_ip_pool(&client, "default", Some(default_pool_range)).await; + // don't need any IP pools because request fails at parse time let ephemeral_create = params::ExternalIpCreate::Ephemeral { pool_name: Some("default".parse().unwrap()), }; - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &get_instances_url()) - .body(Some(¶ms::InstanceCreate { - identity: IdentityMetadataCreateParams { - name: "default-pool-inst".parse().unwrap(), - description: "instance default-pool-inst".into(), - }, - ncpus: InstanceCpuCount(4), - memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), - user_data: - b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" - .to_vec(), - network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![ - ephemeral_create.clone(), ephemeral_create - ], - disks: vec![], - start: true, - })) - .expect_status(Some(StatusCode::BAD_REQUEST)), + let create_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "default-pool-inst".parse().unwrap(), + description: "instance default-pool-inst".into(), + }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: String::from("the_host"), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![ephemeral_create.clone(), ephemeral_create], + disks: vec![], + start: true, + }; + let error = object_create_error( + client, + &get_instances_url(), + &create_params, + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; + assert_eq!( error.message, "An instance may not have more than 1 ephemeral IP address" @@ -3915,8 +4064,9 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { ) .await; - // Populate IP Pool - populate_ip_pool(&client, "default", None).await; + // can't use create_default_ip_pool because we need to link to the silo we just made + create_ip_pool(&client, "default", None).await; + link_ip_pool(&client, "default", &silo.identity.id, true).await; // Create test projects NexusRequest::objects_post( @@ -4022,8 +4172,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - create_project(client, PROJECT_NAME).await; + create_project_and_pool(client).await; // Add a few more sleds let nsleds = 3; diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 6a633fc5e1..5682df2c3a 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -6,33 +6,47 @@ use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; +use dropshot::ResultsPage; use http::method::Method; use http::StatusCode; +use nexus_db_queries::db::datastore::SERVICE_IP_POOL_NAME; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_instance; +use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::link_ip_pool; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::object_create_error; +use nexus_test_utils::resource_helpers::object_delete; +use nexus_test_utils::resource_helpers::object_delete_error; +use nexus_test_utils::resource_helpers::object_get; +use nexus_test_utils::resource_helpers::object_get_error; +use nexus_test_utils::resource_helpers::object_put; +use nexus_test_utils::resource_helpers::object_put_error; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::{ - create_instance, create_instance_with, -}; use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::params::ExternalIpCreate; -use nexus_types::external_api::params::InstanceDiskAttachment; -use nexus_types::external_api::params::InstanceNetworkInterfaceAttachment; +use nexus_types::external_api::params; use nexus_types::external_api::params::IpPoolCreate; +use nexus_types::external_api::params::IpPoolSiloLink; +use nexus_types::external_api::params::IpPoolSiloUpdate; use nexus_types::external_api::params::IpPoolUpdate; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; use nexus_types::external_api::shared::Ipv6Range; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; +use nexus_types::external_api::views::IpPoolSilo; +use nexus_types::external_api::views::Silo; +use nexus_types::identity::Resource; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::NameOrId; use omicron_common::api::external::{IdentityMetadataCreateParams, Name}; use omicron_nexus::TestInterfaces; use sled_agent_client::TestInterfaces as SledTestInterfaces; -use std::collections::HashSet; +use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -58,41 +72,18 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { .await .expect("Failed to list IP Pools") .all_items; - assert_eq!(ip_pools.len(), 1, "Expected to see default IP pool"); - - assert_eq!(ip_pools[0].identity.name, "default",); - assert_eq!(ip_pools[0].silo_id, None); - assert!(ip_pools[0].is_default); + assert_eq!(ip_pools.len(), 0, "Expected empty list of IP pools"); // Verify 404 if the pool doesn't exist yet, both for creating or deleting - let error: HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - &ip_pool_url, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + let error = + object_get_error(client, &ip_pool_url, StatusCode::NOT_FOUND).await; assert_eq!( error.message, format!("not found: ip-pool with name \"{}\"", pool_name), ); - let error: HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::DELETE, - &ip_pool_url, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + + let error = + object_delete_error(client, &ip_pool_url, StatusCode::NOT_FOUND).await; assert_eq!( error.message, format!("not found: ip-pool with name \"{}\"", pool_name), @@ -105,20 +96,11 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { name: String::from(pool_name).parse().unwrap(), description: String::from(description), }, - silo: None, - is_default: false, }; let created_pool: IpPool = - NexusRequest::objects_post(client, ip_pools_url, ¶ms) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + object_create(client, ip_pools_url, ¶ms).await; assert_eq!(created_pool.identity.name, pool_name); assert_eq!(created_pool.identity.description, description); - assert_eq!(created_pool.silo_id, None); let list = NexusRequest::iter_collection_authn::( client, @@ -129,30 +111,21 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { .await .expect("Failed to list IP Pools") .all_items; - assert_eq!(list.len(), 2, "Expected exactly two IP pools"); - assert_pools_eq(&created_pool, &list[1]); + assert_eq!(list.len(), 1, "Expected exactly 1 IP pool"); + assert_pools_eq(&created_pool, &list[0]); - let fetched_pool: IpPool = NexusRequest::object_get(client, &ip_pool_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + let fetched_pool: IpPool = object_get(client, &ip_pool_url).await; assert_pools_eq(&created_pool, &fetched_pool); // Verify we get a conflict error if we insert it again - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new(client, Method::POST, ip_pools_url) - .body(Some(¶ms)) - .expect_status(Some(StatusCode::BAD_REQUEST)), + let error = object_create_error( + client, + ip_pools_url, + ¶ms, + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; + assert_eq!( error.message, format!("already exists: ip-pool \"{}\"", pool_name) @@ -167,27 +140,13 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { .unwrap(), ); let created_range: IpPoolRange = - NexusRequest::objects_post(client, &ip_pool_add_range_url, &range) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + object_create(client, &ip_pool_add_range_url, &range).await; assert_eq!(range.first_address(), created_range.range.first_address()); assert_eq!(range.last_address(), created_range.range.last_address()); - let error: HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::BAD_REQUEST, - Method::DELETE, - &ip_pool_url, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + + let error: HttpErrorResponseBody = + object_delete_error(client, &ip_pool_url, StatusCode::BAD_REQUEST) + .await; assert_eq!( error.message, "IP Pool cannot be deleted while it contains IP ranges", @@ -208,13 +167,7 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { }, }; let modified_pool: IpPool = - NexusRequest::object_put(client, &ip_pool_url, Some(&updates)) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + object_put(client, &ip_pool_url, &updates).await; assert_eq!(modified_pool.identity.name, new_pool_name); assert_eq!(modified_pool.identity.id, created_pool.identity.id); assert_eq!( @@ -231,27 +184,11 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { ); let fetched_modified_pool: IpPool = - NexusRequest::object_get(client, &new_ip_pool_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + object_get(client, &new_ip_pool_url).await; assert_pools_eq(&modified_pool, &fetched_modified_pool); - let error: HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - &ip_pool_url, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + let error: HttpErrorResponseBody = + object_get_error(client, &ip_pool_url, StatusCode::NOT_FOUND).await; assert_eq!( error.message, format!("not found: ip-pool with name \"{}\"", pool_name), @@ -275,69 +212,346 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { .expect("Expected to be able to delete an empty IP Pool"); } +/// The internal IP pool, defined by its association with the internal silo, +/// cannot be interacted with through the operator API. CRUD operations should +/// all 404 except fetch by name or ID. #[nexus_test] -async fn test_ip_pool_with_silo(cptestctx: &ControlPlaneTestContext) { +async fn test_ip_pool_service_no_cud(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - // can create a pool with an existing silo by name - let params = IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: String::from("p0").parse().unwrap(), - description: String::from(""), + let internal_pool_name_url = + format!("/v1/system/ip-pools/{}", SERVICE_IP_POOL_NAME); + + // we can fetch the service pool by name or ID + let pool = NexusRequest::object_get(client, &internal_pool_name_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + let internal_pool_id_url = + format!("/v1/system/ip-pools/{}", pool.identity.id); + let pool = NexusRequest::object_get(client, &internal_pool_id_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // but it does not come back in the list. there are none in the list + let pools = + objects_list_page_authz::(client, "/v1/system/ip-pools").await; + assert_eq!(pools.items.len(), 0); + + // deletes fail + + let error = object_delete_error( + client, + &internal_pool_name_url, + StatusCode::NOT_FOUND, + ) + .await; + assert_eq!( + error.message, + "not found: ip-pool with name \"oxide-service-pool\"" + ); + + let not_found_id = + format!("not found: ip-pool with id \"{}\"", pool.identity.id); + let error = object_delete_error( + client, + &internal_pool_id_url, + StatusCode::NOT_FOUND, + ) + .await; + assert_eq!(error.message, not_found_id); + + // Update not allowed + let put_body = params::IpPoolUpdate { + identity: IdentityMetadataUpdateParams { + name: Some("test".parse().unwrap()), + description: Some("test".to_string()), }, - silo: Some(NameOrId::Name(cptestctx.silo_name.clone())), - is_default: false, }; - let created_pool = create_pool(client, ¶ms).await; - assert_eq!(created_pool.identity.name, "p0"); + let error = object_put_error( + client, + &internal_pool_id_url, + &put_body, + StatusCode::NOT_FOUND, + ) + .await; + assert_eq!(error.message, not_found_id); + + // linking not allowed + + // let link_body = params::IpPoolSiloLink { + // silo: NameOrId::Name(cptestctx.silo_name.clone()), + // is_default: false, + // }; + // let link_url = format!("{}/silos", internal_pool_id_url); + // let error = object_create_error( + // client, + // &link_url, + // &link_body, + // StatusCode::NOT_FOUND, + // ) + // .await; + // assert_eq!(error.message, not_found_id); + + // TODO: link, unlink, add/remove range by name or ID should all fail +} - let silo_id = - created_pool.silo_id.expect("Expected pool to have a silo_id"); +#[nexus_test] +async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; - // now we'll create another IP pool using that silo ID - let params = IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: String::from("p1").parse().unwrap(), - description: String::from(""), - }, - silo: Some(NameOrId::Id(silo_id)), + let p0 = create_pool(client, "p0").await; + let p1 = create_pool(client, "p1").await; + + // there should be no associations + let assocs_p0 = silos_for_pool(client, "p0").await; + assert_eq!(assocs_p0.items.len(), 0); + + // expect 404 on association if the specified silo doesn't exist + let nonexistent_silo_id = Uuid::new_v4(); + let params = params::IpPoolSiloLink { + silo: NameOrId::Id(nonexistent_silo_id), is_default: false, }; - let created_pool = create_pool(client, ¶ms).await; - assert_eq!(created_pool.identity.name, "p1"); - assert_eq!(created_pool.silo_id.unwrap(), silo_id); - // expect 404 if the specified silo doesn't exist - let bad_silo_params = IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: String::from("p2").parse().unwrap(), - description: String::from(""), - }, - silo: Some(NameOrId::Name( - String::from("not-a-thing").parse().unwrap(), - )), - is_default: false, + let error = object_create_error( + client, + "/v1/system/ip-pools/p0/silos", + ¶ms, + StatusCode::NOT_FOUND, + ) + .await; + + assert_eq!( + error.message, + format!("not found: silo with id \"{nonexistent_silo_id}\"") + ); + + // associate by name with silo that exists + let silo = NameOrId::Name(cptestctx.silo_name.clone()); + let params = + params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; + let _: IpPoolSilo = + object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; + + // second attempt to create the same link errors due to conflict + let error = object_create_error( + client, + "/v1/system/ip-pools/p0/silos", + ¶ms, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); + + // get silo ID so we can test association by ID as well + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo_id = object_get::(client, &silo_url).await.identity.id; + + let assocs_p0 = silos_for_pool(client, "p0").await; + let silo_link = + IpPoolSilo { ip_pool_id: p0.identity.id, silo_id, is_default: false }; + assert_eq!(assocs_p0.items.len(), 1); + assert_eq!(assocs_p0.items[0], silo_link); + + // associate same silo to other pool by ID instead of name + let link_params = params::IpPoolSiloLink { + silo: NameOrId::Id(silo_id), + is_default: true, }; - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new(client, Method::POST, "/v1/system/ip-pools") - .body(Some(&bad_silo_params)) - .expect_status(Some(StatusCode::NOT_FOUND)), + let url = "/v1/system/ip-pools/p1/silos"; + let _: IpPoolSilo = object_create(client, &url, &link_params).await; + + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 1); + assert_eq!( + silos_p1.items[0], + IpPoolSilo { ip_pool_id: p1.identity.id, is_default: true, silo_id } + ); + + // creating a third pool and trying to link it as default: true should fail + create_pool(client, "p2").await; + let url = "/v1/system/ip-pools/p2/silos"; + let error = object_create_error( + client, + &url, + &link_params, + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; + assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); + + // pool delete fails because it is linked to a silo + let error = object_delete_error( + client, + "/v1/system/ip-pools/p1", + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + error.message, + "IP Pool cannot be deleted while it is linked to a silo", + ); + + // unlink silo (doesn't matter that it's a default) + let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); + object_delete(client, &url).await; + + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 0); + + // now we can delete the pool too + object_delete(client, "/v1/system/ip-pools/p1").await; +} + +#[nexus_test] +async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + create_pool(client, "p0").await; + create_pool(client, "p1").await; + + // there should be no linked silos + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 0); + + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 0); + + // put 404s if link doesn't exist yet + let params = IpPoolSiloUpdate { is_default: true }; + let p0_silo_url = + format!("/v1/system/ip-pools/p0/silos/{}", cptestctx.silo_name); + let error = + object_put_error(client, &p0_silo_url, ¶ms, StatusCode::NOT_FOUND) + .await; + assert_eq!( + error.message, + "not found: ip-pool-resource with id \"(pool, silo)\"" + ); - assert_eq!(error.message, "not found: silo with name \"not-a-thing\""); + // associate both pools with the test silo + let silo = NameOrId::Name(cptestctx.silo_name.clone()); + let params = + params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; + let _: IpPoolSilo = + object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; + let _: IpPoolSilo = + object_create(client, "/v1/system/ip-pools/p1/silos", ¶ms).await; + + // now both are linked to the silo, neither is marked default + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 1); + assert_eq!(silos_p0.items[0].is_default, false); + + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 1); + assert_eq!(silos_p1.items[0].is_default, false); + + // make p0 default + let params = IpPoolSiloUpdate { is_default: true }; + let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + + // making the same one default again is not an error + let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + + // now p0 is default + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 1); + assert_eq!(silos_p0.items[0].is_default, true); + + // p1 still not default + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 1); + assert_eq!(silos_p1.items[0].is_default, false); + + // making p1 the default pool for the silo unsets it on p0 + + // set p1 default + let params = IpPoolSiloUpdate { is_default: true }; + let p1_silo_url = + format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); + let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + + // p1 is now default + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 1); + assert_eq!(silos_p1.items[0].is_default, true); + + // p0 is no longer default + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 1); + assert_eq!(silos_p0.items[0].is_default, false); + + // we can also unset default + let params = IpPoolSiloUpdate { is_default: false }; + let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + + let silos_p1 = silos_for_pool(client, "p1").await; + assert_eq!(silos_p1.items.len(), 1); + assert_eq!(silos_p1.items[0].is_default, false); } -async fn create_pool( +// IP pool list fetch logic includes a join to ip_pool_resource, which is +// unusual, so we want to make sure pagination logic still works +#[nexus_test] +async fn test_ip_pool_pagination(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let base_url = "/v1/system/ip-pools"; + let first_page = objects_list_page_authz::(client, &base_url).await; + + // we start out with no pools + assert_eq!(first_page.items.len(), 0); + + let mut pool_names = vec![]; + + // create more pools to work with, adding their names to the list so we + // can use it to check order + for i in 1..=8 { + let name = format!("other-pool-{}", i); + pool_names.push(name.clone()); + create_pool(client, &name).await; + } + + let first_five_url = format!("{}?limit=5", base_url); + let first_five = + objects_list_page_authz::(client, &first_five_url).await; + assert!(first_five.next_page.is_some()); + assert_eq!(get_names(first_five.items), &pool_names[0..5]); + + let next_page_url = format!( + "{}?limit=5&page_token={}", + base_url, + first_five.next_page.unwrap() + ); + let next_page = + objects_list_page_authz::(client, &next_page_url).await; + assert_eq!(get_names(next_page.items), &pool_names[5..8]); +} + +/// helper to make tests less ugly +fn get_names(pools: Vec) -> Vec { + pools.iter().map(|p| p.identity.name.to_string()).collect() +} + +async fn silos_for_pool( client: &ClientTestContext, - params: &IpPoolCreate, -) -> IpPool { - NexusRequest::objects_post(client, "/v1/system/ip-pools", params) + id: &str, +) -> ResultsPage { + let url = format!("/v1/system/ip-pools/{}/silos", id); + objects_list_page_authz::(client, &url).await +} + +async fn create_pool(client: &ClientTestContext, name: &str) -> IpPool { + let params = IpPoolCreate { + identity: IdentityMetadataCreateParams { + name: Name::try_from(name.to_string()).unwrap(), + description: "".to_string(), + }, + }; + NexusRequest::objects_post(client, "/v1/system/ip-pools", ¶ms) .authn_as(AuthnMode::PrivilegedUser) .execute() .await @@ -374,8 +588,8 @@ async fn test_ip_pool_range_overlapping_ranges_fails( name: String::from(pool_name).parse().unwrap(), description: String::from(description), }, - silo: None, - is_default: false, + // silo: None, + // is_default: false, }; let created_pool: IpPool = NexusRequest::objects_post(client, ip_pools_url, ¶ms) @@ -557,8 +771,6 @@ async fn test_ip_pool_range_pagination(cptestctx: &ControlPlaneTestContext) { name: String::from(pool_name).parse().unwrap(), description: String::from(description), }, - silo: None, - is_default: false, }; let created_pool: IpPool = NexusRequest::objects_post(client, ip_pools_url, ¶ms) @@ -640,71 +852,15 @@ async fn test_ip_pool_range_pagination(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_ip_pool_list_usable_by_project( - cptestctx: &ControlPlaneTestContext, -) { +async fn test_ip_pool_list_in_silo(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - let scoped_ip_pools_url = "/v1/ip-pools"; - let ip_pools_url = "/v1/system/ip-pools"; let mypool_name = "mypool"; - let default_ip_pool_add_range_url = - format!("{}/default/ranges/add", ip_pools_url); - let mypool_ip_pool_add_range_url = - format!("{}/{}/ranges/add", ip_pools_url, mypool_name); - let service_ip_pool_add_range_url = - "/v1/system/ip-pools-service/ranges/add".to_string(); - - // Add an IP range to the default pool - let default_range = IpRange::V4( - Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 1), - std::net::Ipv4Addr::new(10, 0, 0, 2), - ) - .unwrap(), - ); - let created_range: IpPoolRange = NexusRequest::objects_post( - client, - &default_ip_pool_add_range_url, - &default_range, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!( - default_range.first_address(), - created_range.range.first_address() - ); - assert_eq!( - default_range.last_address(), - created_range.range.last_address() - ); - - // Create an org and project, and then try to make an instance with an IP from - // each range to which the project is expected have access. const PROJECT_NAME: &str = "myproj"; - const INSTANCE_NAME: &str = "myinst"; create_project(client, PROJECT_NAME).await; - // TODO: give this project explicit access when such functionality exists - let params = IpPoolCreate { - identity: IdentityMetadataCreateParams { - name: String::from(mypool_name).parse().unwrap(), - description: String::from("right on cue"), - }, - silo: None, - is_default: false, - }; - NexusRequest::objects_post(client, ip_pools_url, ¶ms) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Add an IP range to mypool + // create pool with range and link (as default pool) to default silo, which + // is the privileged user's silo let mypool_range = IpRange::V4( Ipv4Range::new( std::net::Ipv4Addr::new(10, 0, 0, 51), @@ -712,102 +868,35 @@ async fn test_ip_pool_list_usable_by_project( ) .unwrap(), ); - let created_range: IpPoolRange = NexusRequest::objects_post( - client, - &mypool_ip_pool_add_range_url, - &mypool_range, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!( - mypool_range.first_address(), - created_range.range.first_address() - ); - assert_eq!(mypool_range.last_address(), created_range.range.last_address()); + create_ip_pool(client, mypool_name, Some(mypool_range)).await; + link_ip_pool(client, mypool_name, &DEFAULT_SILO.id(), true).await; - // add a service range we *don't* expect to see in the results - let service_range = IpRange::V4( + // create another pool and don't link it to anything + let otherpool_name = "other-pool"; + let otherpool_range = IpRange::V4( Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 101), - std::net::Ipv4Addr::new(10, 0, 0, 102), + std::net::Ipv4Addr::new(10, 0, 0, 53), + std::net::Ipv4Addr::new(10, 0, 0, 54), ) .unwrap(), ); + create_ip_pool(client, otherpool_name, Some(otherpool_range)).await; - let created_range: IpPoolRange = NexusRequest::objects_post( - client, - &service_ip_pool_add_range_url, - &service_range, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!( - service_range.first_address(), - created_range.range.first_address() - ); - assert_eq!( - service_range.last_address(), - created_range.range.last_address() - ); - - // TODO: add non-service, ip pools that the project *can't* use, when that - // functionality is implemented in the future (i.e. a "notmypool") + let list = + objects_list_page_authz::(client, "/v1/ip-pools").await.items; - let list_url = format!("{}?project={}", scoped_ip_pools_url, PROJECT_NAME); - let list = NexusRequest::iter_collection_authn::( - client, &list_url, "", None, - ) - .await - .expect("Failed to list IP Pools") - .all_items; + // only mypool shows up because it's linked to my silo + assert_eq!(list.len(), 1); + assert_eq!(list[0].identity.name.to_string(), mypool_name); - // default and mypool - assert_eq!(list.len(), 2); - let pool_names: HashSet = - list.iter().map(|pool| pool.identity.name.to_string()).collect(); - let expected_names: HashSet = - ["default", "mypool"].into_iter().map(|s| s.to_string()).collect(); - assert_eq!(pool_names, expected_names); - - // ensure we can view each pool returned - for pool_name in &pool_names { - let view_pool_url = format!( - "{}/{}?project={}", - scoped_ip_pools_url, pool_name, PROJECT_NAME - ); - let pool: IpPool = NexusRequest::object_get(client, &view_pool_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!(pool.identity.name.as_str(), pool_name.as_str()); - } + // fetch the pool directly too + let url = format!("/v1/ip-pools/{}", mypool_name); + let pool: IpPool = object_get(client, &url).await; + assert_eq!(pool.identity.name.as_str(), mypool_name); - // ensure we can successfully create an instance with each of the pools we - // should be able to access - for pool_name in pool_names { - let instance_name = format!("{}-{}", INSTANCE_NAME, pool_name); - let pool_name = Some(Name::try_from(pool_name).unwrap()); - create_instance_with( - client, - PROJECT_NAME, - &instance_name, - &InstanceNetworkInterfaceAttachment::Default, - Vec::::new(), - vec![ExternalIpCreate::Ephemeral { pool_name }], - ) - .await; - } + // fetching the other pool directly 404s + let url = format!("/v1/ip-pools/{}", otherpool_name); + object_get_error(client, &url, StatusCode::NOT_FOUND).await; } #[nexus_test] @@ -818,13 +907,36 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; let ip_pools_url = "/v1/system/ip-pools"; - let pool_name = "default"; + let pool_name = "mypool"; let ip_pool_url = format!("{}/{}", ip_pools_url, pool_name); + let ip_pool_silos_url = format!("{}/{}/silos", ip_pools_url, pool_name); let ip_pool_ranges_url = format!("{}/ranges", ip_pool_url); let ip_pool_add_range_url = format!("{}/add", ip_pool_ranges_url); let ip_pool_rem_range_url = format!("{}/remove", ip_pool_ranges_url); - // Add an IP range to the default pool + // create pool + let params = IpPoolCreate { + identity: IdentityMetadataCreateParams { + name: String::from(pool_name).parse().unwrap(), + description: String::from("right on cue"), + }, + }; + NexusRequest::objects_post(client, ip_pools_url, ¶ms) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // associate pool with default silo, which is the privileged user's silo + let params = IpPoolSiloLink { + silo: NameOrId::Id(DEFAULT_SILO.id()), + is_default: true, + }; + NexusRequest::objects_post(client, &ip_pool_silos_url, ¶ms) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // Add an IP range to the pool let range = IpRange::V4( Ipv4Range::new( std::net::Ipv4Addr::new(10, 0, 0, 1), diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 89dd2e3cc6..5f517d49e0 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -9,8 +9,8 @@ use http::{Method, StatusCode}; use nexus_db_queries::db::fixed_data::silo::SILO_ID; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::resource_helpers::{ - create_disk, create_instance, create_project, objects_list_page_authz, - populate_ip_pool, DiskTest, + create_default_ip_pool, create_disk, create_instance, create_project, + objects_list_page_authz, DiskTest, }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; @@ -168,7 +168,7 @@ async fn test_metrics( let client = &cptestctx.external_client; cptestctx.server.register_as_producer().await; // needed for oximeter metrics to work - populate_ip_pool(&client, "default", None).await; // needed for instance create to work + create_default_ip_pool(&client).await; // needed for instance create to work DiskTest::new(cptestctx).await; // needed for disk create to work // silo metrics start out zero diff --git a/nexus/tests/integration_tests/pantry.rs b/nexus/tests/integration_tests/pantry.rs index dc4e8e6c95..1a3908affa 100644 --- a/nexus/tests/integration_tests/pantry.rs +++ b/nexus/tests/integration_tests/pantry.rs @@ -11,10 +11,10 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::identity_eq; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -54,8 +54,8 @@ fn get_disk_attach_url(instance_name: &str) -> String { ) } -async fn create_org_and_project(client: &ClientTestContext) -> Uuid { - populate_ip_pool(&client, "default", None).await; +async fn create_project_and_pool(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } @@ -350,7 +350,7 @@ async fn validate_disk_state(client: &ClientTestContext, state: DiskState) { async fn test_disk_create_for_importing(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); let new_disk = params::DiskCreate { @@ -396,7 +396,7 @@ async fn test_cannot_mount_import_ready_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -427,7 +427,7 @@ async fn test_cannot_mount_import_from_bulk_writes_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -451,7 +451,7 @@ async fn test_import_blocks_with_bulk_write( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -492,7 +492,7 @@ async fn test_import_blocks_with_bulk_write_with_snapshot( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -543,7 +543,7 @@ async fn test_cannot_finalize_without_stopping_bulk_writes( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -572,7 +572,7 @@ async fn test_cannot_bulk_write_to_unaligned_offset( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -605,7 +605,7 @@ async fn test_cannot_bulk_write_data_not_block_size_multiple( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -637,7 +637,7 @@ async fn test_cannot_bulk_write_data_past_end_of_disk( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -669,7 +669,7 @@ async fn test_cannot_bulk_write_data_non_base64( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -707,7 +707,7 @@ async fn test_can_stop_start_import_from_bulk_write( let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -735,7 +735,7 @@ async fn test_cannot_bulk_write_start_attached_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -765,7 +765,7 @@ async fn test_cannot_bulk_write_attached_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -795,7 +795,7 @@ async fn test_cannot_bulk_write_stop_attached_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; @@ -824,7 +824,7 @@ async fn test_cannot_finalize_attached_disk( let nexus = &cptestctx.server.apictx().nexus; DiskTest::new(&cptestctx).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; create_disk_with_state_importing_blocks(client).await; diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index 24b2721a1d..d9d6ceef5b 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -10,8 +10,8 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::{ - create_disk, create_project, create_vpc, object_create, populate_ip_pool, - project_get, projects_list, DiskTest, + create_default_ip_pool, create_disk, create_project, create_vpc, + object_create, project_get, projects_list, DiskTest, }; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -134,7 +134,7 @@ async fn test_project_deletion_with_instance( ) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; // Create a project that we'll use for testing. let name = "springfield-squidport"; diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index 0ad2419bee..858837bb32 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -6,16 +6,17 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::http_testing::TestResponse; +use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_local_user; use nexus_test_utils::resource_helpers::grant_iam; +use nexus_test_utils::resource_helpers::link_ip_pool; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::SiloRole; -use nexus_types::external_api::views::SiloQuotas; +use nexus_types::external_api::views::{Silo, SiloQuotas}; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceCpuCount; @@ -168,7 +169,7 @@ async fn setup_silo_with_quota( silo_name: &str, quotas: params::SiloQuotasCreate, ) -> ResourceAllocator { - let silo = object_create( + let silo: Silo = object_create( client, "/v1/system/silos", ¶ms::SiloCreate { @@ -186,7 +187,10 @@ async fn setup_silo_with_quota( ) .await; - populate_ip_pool(&client, "default", None).await; + // create default pool and link to this silo. can't use + // create_default_ip_pool because that links to the default silo + create_ip_pool(&client, "default", None).await; + link_ip_pool(&client, "default", &silo.identity.id, true).await; // Create a silo user let user = create_local_user( diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index a166280ead..5e399cbe84 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -7,12 +7,12 @@ use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use nexus_test_interface::NexusServer; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_physical_disk; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::delete_physical_disk; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::start_sled_agent; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; @@ -144,7 +144,7 @@ async fn test_sled_instance_list(cptestctx: &ControlPlaneTestContext) { .is_empty()); // Create an IP pool and project that we'll use for testing. - populate_ip_pool(&external_client, "default", None).await; + create_default_ip_pool(&external_client).await; let project = create_project(&external_client, "test-project").await; let instance = create_instance(&external_client, "test-project", "test-instance") diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 24b04bf718..87ec2b3163 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -17,9 +17,9 @@ use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -48,7 +48,8 @@ fn get_disk_url(name: &str) -> String { format!("/v1/disks/{}?project={}", name, PROJECT_NAME) } -async fn create_org_and_project(client: &ClientTestContext) -> Uuid { +async fn create_project_and_pool(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } @@ -57,8 +58,7 @@ async fn create_org_and_project(client: &ClientTestContext) -> Uuid { async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Define a global image @@ -162,8 +162,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { async fn test_snapshot_without_instance(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Define a global image @@ -262,8 +261,7 @@ async fn test_delete_snapshot(cptestctx: &ControlPlaneTestContext) { let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - let project_id = create_org_and_project(client).await; + let project_id = create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a blank disk @@ -422,7 +420,7 @@ async fn test_reject_creating_disk_from_snapshot( let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); - let project_id = create_org_and_project(&client).await; + let project_id = create_project_and_pool(&client).await; let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); @@ -575,7 +573,7 @@ async fn test_reject_creating_disk_from_illegal_snapshot( let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); - let project_id = create_org_and_project(&client).await; + let project_id = create_project_and_pool(&client).await; let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); @@ -671,7 +669,7 @@ async fn test_reject_creating_disk_from_other_project_snapshot( let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); - let project_id = create_org_and_project(&client).await; + let project_id = create_project_and_pool(&client).await; let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); @@ -751,8 +749,7 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) { // Test that snapshots cannot be created if there is no space for the blocks let client = &cptestctx.external_client; DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a disk at just over half the capacity of what DiskTest allocates @@ -805,8 +802,7 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) { async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Define a global image @@ -905,7 +901,7 @@ async fn test_create_snapshot_record_idempotent( let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); - let project_id = create_org_and_project(&client).await; + let project_id = create_project_and_pool(&client).await; let disk_id = Uuid::new_v4(); let snapshot = db::model::Snapshot { @@ -1093,8 +1089,7 @@ async fn test_multiple_deletes_not_sent(cptestctx: &ControlPlaneTestContext) { let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - let _project_id = create_org_and_project(client).await; + let _project_id = create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a blank disk diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 7f5c27384c..91a933754c 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -13,10 +13,10 @@ use ipnetwork::Ipv4Network; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use omicron_common::api::external::{ @@ -84,7 +84,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { let project_name = "springfield-squidport"; // Create a project that we'll use for testing. - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; create_project(&client, project_name).await; let url_instances = format!("/v1/instances?project={}", project_name); diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 317a5a0576..3671564866 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -69,9 +69,8 @@ async fn test_unauthorized(cptestctx: &ControlPlaneTestContext) { .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .unwrap_or_else(|_| { - panic!("Failed to GET from URL: {url}") - }), + .map_err(|e| panic!("Failed to GET from URL: {url}, {e}")) + .unwrap(), id_routes, ), SetupReq::Post { url, body, id_routes } => ( @@ -80,7 +79,8 @@ async fn test_unauthorized(cptestctx: &ControlPlaneTestContext) { .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .unwrap_or_else(|_| panic!("Failed to POST to URL: {url}")), + .map_err(|e| panic!("Failed to POST to URL: {url}, {e}")) + .unwrap(), id_routes, ), }; @@ -201,14 +201,24 @@ static SETUP_REQUESTS: Lazy> = Lazy::new(|| { &*DEMO_SILO_USER_ID_SET_PASSWORD_URL, ], }, - // Get the default IP pool - SetupReq::Get { url: &DEMO_IP_POOL_URL, id_routes: vec![] }, + // Create the default IP pool + SetupReq::Post { + url: &DEMO_IP_POOLS_URL, + body: serde_json::to_value(&*DEMO_IP_POOL_CREATE).unwrap(), + id_routes: vec!["/v1/ip-pools/{id}"], + }, // Create an IP pool range SetupReq::Post { url: &DEMO_IP_POOL_RANGES_ADD_URL, body: serde_json::to_value(&*DEMO_IP_POOL_RANGE).unwrap(), id_routes: vec![], }, + // Link default pool to default silo + SetupReq::Post { + url: &DEMO_IP_POOL_SILOS_URL, + body: serde_json::to_value(&*DEMO_IP_POOL_SILOS_BODY).unwrap(), + id_routes: vec![], + }, // Create a Project in the Organization SetupReq::Post { url: "/v1/projects", diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs index 5ebf56f35a..e09e71a9e3 100644 --- a/nexus/tests/integration_tests/utilization.rs +++ b/nexus/tests/integration_tests/utilization.rs @@ -3,10 +3,10 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -27,7 +27,7 @@ type ControlPlaneTestContext = async fn test_utilization(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; + create_default_ip_pool(&client).await; let current_util = objects_list_page_authz::( client, diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 466cb5472e..34f037ee8c 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -12,9 +12,9 @@ use nexus_db_queries::db::DataStore; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::populate_ip_pool; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -51,14 +51,14 @@ fn get_snapshot_url(snapshot: &str) -> String { format!("/v1/snapshots/{}?project={}", snapshot, PROJECT_NAME) } -async fn create_org_and_project(client: &ClientTestContext) -> Uuid { +async fn create_project_and_pool(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } async fn create_image(client: &ClientTestContext) -> views::Image { - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; // Define a global image @@ -411,8 +411,7 @@ async fn test_multiple_disks_multiple_snapshots_order_1( // Test multiple disks with multiple snapshots let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a blank disk @@ -547,8 +546,7 @@ async fn test_multiple_disks_multiple_snapshots_order_2( // Test multiple disks with multiple snapshots, varying the delete order let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); // Create a blank disk @@ -817,8 +815,7 @@ async fn test_multiple_layers_of_snapshots_delete_all_disks_first( // delete all disks, then delete all snapshots let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; prepare_for_test_multiple_layers_of_snapshots(&client).await; @@ -856,8 +853,7 @@ async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first( // delete all snapshots, then delete all disks let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; prepare_for_test_multiple_layers_of_snapshots(&client).await; @@ -895,8 +891,7 @@ async fn test_multiple_layers_of_snapshots_random_delete_order( // delete snapshots and disks in a random order let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; prepare_for_test_multiple_layers_of_snapshots(&client).await; @@ -1116,8 +1111,7 @@ async fn delete_image_test( let disk_test = DiskTest::new(&cptestctx).await; let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disks_url = get_disks_url(); @@ -2345,8 +2339,7 @@ async fn test_disk_create_saga_unwinds_correctly( // created. let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disk_test = DiskTest::new(&cptestctx).await; let disks_url = get_disks_url(); @@ -2398,8 +2391,7 @@ async fn test_snapshot_create_saga_unwinds_correctly( // created. let client = &cptestctx.external_client; - populate_ip_pool(&client, "default", None).await; - create_org_and_project(client).await; + create_project_and_pool(client).await; let disk_test = DiskTest::new(&cptestctx).await; let disks_url = get_disks_url(); diff --git a/nexus/tests/integration_tests/vpc_subnets.rs b/nexus/tests/integration_tests/vpc_subnets.rs index 3067300e19..76cff9ac79 100644 --- a/nexus/tests/integration_tests/vpc_subnets.rs +++ b/nexus/tests/integration_tests/vpc_subnets.rs @@ -14,7 +14,7 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{ - create_instance, create_project, create_vpc, populate_ip_pool, + create_default_ip_pool, create_instance, create_project, create_vpc, }; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::{params, views::VpcSubnet}; @@ -37,8 +37,8 @@ async fn test_delete_vpc_subnet_with_interfaces_fails( // Create a project that we'll use for testing. let project_name = "springfield-squidport"; let instance_name = "inst"; + create_default_ip_pool(client).await; let _ = create_project(&client, project_name).await; - populate_ip_pool(client, "default", None).await; let subnets_url = format!("/v1/vpc-subnets?project={}&vpc=default", project_name); diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index b607bbf1f3..2d842dd930 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -149,6 +149,10 @@ ip_pool_service_range_add POST /v1/system/ip-pools-service/ra ip_pool_service_range_list GET /v1/system/ip-pools-service/ranges ip_pool_service_range_remove POST /v1/system/ip-pools-service/ranges/remove ip_pool_service_view GET /v1/system/ip-pools-service +ip_pool_silo_link POST /v1/system/ip-pools/{pool}/silos +ip_pool_silo_list GET /v1/system/ip-pools/{pool}/silos +ip_pool_silo_unlink DELETE /v1/system/ip-pools/{pool}/silos/{silo} +ip_pool_silo_update PUT /v1/system/ip-pools/{pool}/silos/{silo} ip_pool_update PUT /v1/system/ip-pools/{pool} ip_pool_view GET /v1/system/ip-pools/{pool} networking_address_lot_block_list GET /v1/system/networking/address-lot/{address_lot}/blocks diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 209d1f607c..d3f269ef5d 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -836,17 +836,6 @@ impl std::fmt::Debug for CertificateCreate { pub struct IpPoolCreate { #[serde(flatten)] pub identity: IdentityMetadataCreateParams, - - /// If an IP pool is associated with a silo, instance IP allocations in that - /// silo can draw from that pool. - pub silo: Option, - - /// Whether the IP pool is considered a default pool for its scope (fleet - /// or silo). If a pool is marked default and is associated with a silo, - /// instances created in that silo will draw IPs from that pool unless - /// another pool is specified at instance create time. - #[serde(default)] - pub is_default: bool, } /// Parameters for updating an IP Pool @@ -856,6 +845,31 @@ pub struct IpPoolUpdate { pub identity: IdentityMetadataUpdateParams, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct IpPoolSiloPath { + pub pool: NameOrId, + pub silo: NameOrId, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct IpPoolSiloLink { + pub silo: NameOrId, + /// When a pool is the default for a silo, floating IPs and instance + /// ephemeral IPs will come from that pool when no other pool is specified. + /// There can be at most one default for a given silo. + pub is_default: bool, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct IpPoolSiloUpdate { + /// When a pool is the default for a silo, floating IPs and instance + /// ephemeral IPs will come from that pool when no other pool is specified. + /// There can be at most one default for a given silo, so when a pool is + /// made default, an existing default will remain linked but will no longer + /// be the default. + pub is_default: bool, +} + // Floating IPs /// Parameters for creating a new floating IP address for instances. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 46a8aa3d95..c85597e94c 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -295,11 +295,23 @@ pub struct VpcRouter { // IP POOLS +/// A collection of IP ranges. If a pool is linked to a silo, IP addresses from +/// the pool can be allocated within that silo #[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct IpPool { #[serde(flatten)] pub identity: IdentityMetadata, - pub silo_id: Option, +} + +/// A link between an IP pool and a silo that allows one to allocate IPs from +/// the pool within the silo +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct IpPoolSilo { + pub ip_pool_id: Uuid, + pub silo_id: Uuid, + /// When a pool is the default for a silo, floating IPs and instance + /// ephemeral IPs will come from that pool when no other pool is specified. + /// There can be at most one default for a given silo. pub is_default: bool, } diff --git a/openapi/nexus.json b/openapi/nexus.json index f2433e5512..a4ba6cbb86 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2154,7 +2154,7 @@ "tags": [ "projects" ], - "summary": "List all IP pools that can be used by a given project", + "summary": "List all IP pools", "operationId": "project_ip_pool_list", "parameters": [ { @@ -2177,14 +2177,6 @@ "type": "string" } }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "sort_by", @@ -2212,9 +2204,7 @@ } }, "x-dropshot-pagination": { - "required": [ - "project" - ] + "required": [] } } }, @@ -2234,14 +2224,6 @@ "schema": { "$ref": "#/components/schemas/NameOrId" } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } } ], "responses": { @@ -5006,6 +4988,213 @@ } } }, + "/v1/system/ip-pools/{pool}/silos": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "List an IP pool's linked silos", + "operationId": "ip_pool_silo_list", + "parameters": [ + { + "in": "path", + "name": "pool", + "description": "Name or ID of the IP pool", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IpPoolSiloResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + }, + "post": { + "tags": [ + "system/networking" + ], + "summary": "Make an IP pool available within a silo", + "operationId": "ip_pool_silo_link", + "parameters": [ + { + "in": "path", + "name": "pool", + "description": "Name or ID of the IP pool", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IpPoolSiloLink" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IpPoolSilo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/ip-pools/{pool}/silos/{silo}": { + "put": { + "tags": [ + "system/networking" + ], + "summary": "Make an IP pool default or not-default for a silo", + "description": "When a pool is made default for a silo, any existing default will remain linked to the silo, but will no longer be the default.", + "operationId": "ip_pool_silo_update", + "parameters": [ + { + "in": "path", + "name": "pool", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IpPoolSiloUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IpPoolSilo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "system/networking" + ], + "summary": "Unlink an IP pool from a silo", + "description": "Will fail if there are any outstanding IPs allocated in the silo.", + "operationId": "ip_pool_silo_unlink", + "parameters": [ + { + "in": "path", + "name": "pool", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/ip-pools-service": { "get": { "tags": [ @@ -12253,7 +12442,7 @@ ] }, "IpPool": { - "description": "Identity-related metadata that's included in nearly all public API objects", + "description": "A collection of IP ranges. If a pool is linked to a silo, IP addresses from the pool can be allocated within that silo", "type": "object", "properties": { "description": { @@ -12265,9 +12454,6 @@ "type": "string", "format": "uuid" }, - "is_default": { - "type": "boolean" - }, "name": { "description": "unique, mutable, user-controlled identifier for each resource", "allOf": [ @@ -12276,11 +12462,6 @@ } ] }, - "silo_id": { - "nullable": true, - "type": "string", - "format": "uuid" - }, "time_created": { "description": "timestamp when this resource was created", "type": "string", @@ -12295,7 +12476,6 @@ "required": [ "description", "id", - "is_default", "name", "time_created", "time_modified" @@ -12308,22 +12488,8 @@ "description": { "type": "string" }, - "is_default": { - "description": "Whether the IP pool is considered a default pool for its scope (fleet or silo). If a pool is marked default and is associated with a silo, instances created in that silo will draw IPs from that pool unless another pool is specified at instance create time.", - "default": false, - "type": "boolean" - }, "name": { "$ref": "#/components/schemas/Name" - }, - "silo": { - "nullable": true, - "description": "If an IP pool is associated with a silo, instance IP allocations in that silo can draw from that pool.", - "allOf": [ - { - "$ref": "#/components/schemas/NameOrId" - } - ] } }, "required": [ @@ -12399,6 +12565,78 @@ "items" ] }, + "IpPoolSilo": { + "description": "A link between an IP pool and a silo that allows one to allocate IPs from the pool within the silo", + "type": "object", + "properties": { + "ip_pool_id": { + "type": "string", + "format": "uuid" + }, + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "silo_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "ip_pool_id", + "is_default", + "silo_id" + ] + }, + "IpPoolSiloLink": { + "type": "object", + "properties": { + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "silo": { + "$ref": "#/components/schemas/NameOrId" + } + }, + "required": [ + "is_default", + "silo" + ] + }, + "IpPoolSiloResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/IpPoolSilo" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "IpPoolSiloUpdate": { + "type": "object", + "properties": { + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo, so when a pool is made default, an existing default will remain linked but will no longer be the default.", + "type": "boolean" + } + }, + "required": [ + "is_default" + ] + }, "IpPoolUpdate": { "description": "Parameters for updating an IP Pool", "type": "object", diff --git a/schema/crdb/23.0.0/up1.sql b/schema/crdb/23.0.0/up1.sql new file mode 100644 index 0000000000..28204a4d3b --- /dev/null +++ b/schema/crdb/23.0.0/up1.sql @@ -0,0 +1,3 @@ +CREATE TYPE IF NOT EXISTS omicron.public.ip_pool_resource_type AS ENUM ( + 'silo' +); diff --git a/schema/crdb/23.0.0/up2.sql b/schema/crdb/23.0.0/up2.sql new file mode 100644 index 0000000000..cf4668c325 --- /dev/null +++ b/schema/crdb/23.0.0/up2.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS omicron.public.ip_pool_resource ( + ip_pool_id UUID NOT NULL, + resource_type omicron.public.ip_pool_resource_type NOT NULL, + resource_id UUID NOT NULL, + is_default BOOL NOT NULL, + + PRIMARY KEY (ip_pool_id, resource_type, resource_id) +); diff --git a/schema/crdb/23.0.0/up3.sql b/schema/crdb/23.0.0/up3.sql new file mode 100644 index 0000000000..c345fd794e --- /dev/null +++ b/schema/crdb/23.0.0/up3.sql @@ -0,0 +1,5 @@ +CREATE UNIQUE INDEX IF NOT EXISTS one_default_ip_pool_per_resource ON omicron.public.ip_pool_resource ( + resource_id +) where + is_default = true; + diff --git a/schema/crdb/23.0.0/up4.sql b/schema/crdb/23.0.0/up4.sql new file mode 100644 index 0000000000..8fb43f9cf1 --- /dev/null +++ b/schema/crdb/23.0.0/up4.sql @@ -0,0 +1,38 @@ +-- Copy existing fleet-scoped pools over to the pool-silo join table +-- +-- Fleet-scoped pools are going away, but we recreate the equivalent of a fleet +-- link for existing fleet-scoped pools by associating them with every existing +-- silo, i.e., inserting a row into the association table for each (pool, silo) +-- pair. +set local disallow_full_table_scans = off; + +INSERT INTO omicron.public.ip_pool_resource (ip_pool_id, resource_type, resource_id, is_default) +SELECT + p.id AS ip_pool_id, + 'silo' AS resource_type, + s.id AS resource_id, + -- Special handling is required for conflicts between a fleet default and a + -- silo default. If pool P1 is a fleet default and pool P2 is a silo default + -- on silo S1, we cannot link both to S1 with is_default = true. What we + -- really want in that case is: + -- + -- row 1: (P1, S1, is_default=false) + -- row 2: (P2, S1, is_default=true) + -- + -- i.e., we want to link both, but have the silo default take precedence. The + -- AND NOT EXISTS here causes is_default to be false in row 1 if there is a + -- conflicting silo default pool. row 2 is inserted in up5. + p.is_default AND NOT EXISTS ( + SELECT 1 FROM omicron.public.ip_pool + WHERE silo_id = s.id AND is_default + ) +FROM omicron.public.ip_pool AS p +-- cross join means we are looking at the cartesian product of all fleet-scoped +-- IP pools and all silos +CROSS JOIN omicron.public.silo AS s +WHERE p.time_deleted IS null + AND p.silo_id IS null -- means it's a fleet pool + AND s.time_deleted IS null +-- this makes it idempotent +ON CONFLICT (ip_pool_id, resource_type, resource_id) +DO NOTHING; diff --git a/schema/crdb/23.0.0/up5.sql b/schema/crdb/23.0.0/up5.sql new file mode 100644 index 0000000000..3c1b100c9b --- /dev/null +++ b/schema/crdb/23.0.0/up5.sql @@ -0,0 +1,13 @@ +-- Copy existing silo-scoped pools over to the pool-silo join table +INSERT INTO omicron.public.ip_pool_resource (ip_pool_id, resource_type, resource_id, is_default) +SELECT + id as ip_pool_id, + 'silo' as resource_type, + silo_id as resource_id, + is_default +FROM omicron.public.ip_pool AS ip +WHERE silo_id IS NOT null + AND time_deleted IS null +-- this makes it idempotent +ON CONFLICT (ip_pool_id, resource_type, resource_id) +DO NOTHING; diff --git a/schema/crdb/23.0.1/README.md b/schema/crdb/23.0.1/README.md new file mode 100644 index 0000000000..bd12f9883b --- /dev/null +++ b/schema/crdb/23.0.1/README.md @@ -0,0 +1 @@ +These steps are separated from 11.0.0 because they drop things that are used in previous steps, which causes the idempotence test to fail when it runs each migration multiple times — the things earlier steps rely on are not there. diff --git a/schema/crdb/23.0.1/up1.sql b/schema/crdb/23.0.1/up1.sql new file mode 100644 index 0000000000..3e5347e78c --- /dev/null +++ b/schema/crdb/23.0.1/up1.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS one_default_pool_per_scope; \ No newline at end of file diff --git a/schema/crdb/23.0.1/up2.sql b/schema/crdb/23.0.1/up2.sql new file mode 100644 index 0000000000..a62bda1adc --- /dev/null +++ b/schema/crdb/23.0.1/up2.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.ip_pool + DROP COLUMN IF EXISTS silo_id, + DROP COLUMN IF EXISTS is_default; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 57ce791a03..e40c97972f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1566,29 +1566,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.ip_pool ( time_deleted TIMESTAMPTZ, /* The collection's child-resource generation number */ - rcgen INT8 NOT NULL, - - /* - * Association with a silo. silo_id is also used to mark an IP pool as - * "internal" by associating it with the oxide-internal silo. Null silo_id - * means the pool is can be used fleet-wide. - */ - silo_id UUID, - - /* Is this the default pool for its scope (fleet or silo) */ - is_default BOOLEAN NOT NULL DEFAULT FALSE + rcgen INT8 NOT NULL ); -/* - * Ensure there can only be one default pool for the fleet or a given silo. - * Coalesce is needed because otherwise different nulls are considered to be - * distinct from each other. - */ -CREATE UNIQUE INDEX IF NOT EXISTS one_default_pool_per_scope ON omicron.public.ip_pool ( - COALESCE(silo_id, '00000000-0000-0000-0000-000000000000'::uuid) -) WHERE - is_default = true AND time_deleted IS NULL; - /* * Index ensuring uniqueness of IP Pool names, globally. */ @@ -1597,6 +1577,33 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_pool_by_name ON omicron.public.ip_pool ) WHERE time_deleted IS NULL; +-- The order here is most-specific first, and it matters because we use this +-- fact to select the most specific default in the case where there is both a +-- silo default and a fleet default. If we were to add a project type, it should +-- be added before silo. +CREATE TYPE IF NOT EXISTS omicron.public.ip_pool_resource_type AS ENUM ( + 'silo' +); + +-- join table associating IP pools with resources like fleet or silo +CREATE TABLE IF NOT EXISTS omicron.public.ip_pool_resource ( + ip_pool_id UUID NOT NULL, + resource_type omicron.public.ip_pool_resource_type NOT NULL, + resource_id UUID NOT NULL, + is_default BOOL NOT NULL, + -- TODO: timestamps for soft deletes? + + -- resource_type is redundant because resource IDs are globally unique, but + -- logically it belongs here + PRIMARY KEY (ip_pool_id, resource_type, resource_id) +); + +-- a given resource can only have one default ip pool +CREATE UNIQUE INDEX IF NOT EXISTS one_default_ip_pool_per_resource ON omicron.public.ip_pool_resource ( + resource_id +) where + is_default = true; + /* * IP Pools are made up of a set of IP ranges, which are start/stop addresses. * Note that these need not be CIDR blocks or well-behaved subnets with a @@ -3251,7 +3258,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '22.0.0', NULL) + ( TRUE, NOW(), NOW(), '23.0.1', NULL) ON CONFLICT DO NOTHING; COMMIT; From a0d2bc3000506711a0600f4995b5880d418eeed2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:46:12 -0800 Subject: [PATCH 146/186] Update Rust crate pretty-hex to 0.4.1 (#4758) --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e8ad7495b..f65cb87448 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1850,7 +1850,7 @@ dependencies = [ "omicron-workspace-hack", "openapi-lint", "openapiv3", - "pretty-hex 0.4.0", + "pretty-hex 0.4.1", "schemars", "serde", "serde_json", @@ -6140,9 +6140,9 @@ checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" [[package]] name = "pretty-hex" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c6b968ed37d62e35b4febaba13bfa231b0b7929d68b8a94e65445a17e2d35f" +checksum = "bbc83ee4a840062f368f9096d80077a9841ec117e17e7f700df81958f1451254" [[package]] name = "pretty_assertions" diff --git a/Cargo.toml b/Cargo.toml index bc5ba0bc45..df0afd42e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -290,7 +290,7 @@ petgraph = "0.6.4" postgres-protocol = "0.6.6" predicates = "3.0.4" pretty_assertions = "1.4.0" -pretty-hex = "0.4.0" +pretty-hex = "0.4.1" prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } From f1ec5d8fc326a5331ed8964ff13abdbbf785cac9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:46:21 -0800 Subject: [PATCH 147/186] Update Rust crate russh to 0.40.2 (#4760) --- end-to-end-tests/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 9e38112c36..5ff958dc9c 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -16,7 +16,7 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest.workspace = true -russh = "0.40.1" +russh = "0.40.2" russh-keys = "0.40.1" serde.workspace = true serde_json.workspace = true From 3a69cb0159764a4de932f3e3dd6da1385bf9095f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:46:35 -0800 Subject: [PATCH 148/186] Update Rust crate semver to 1.0.21 (#4761) --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 8 ++++---- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f65cb87448..6c75adec36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -795,7 +795,7 @@ checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" dependencies = [ "camino", "cargo-platform", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_json", "thiserror", @@ -2733,7 +2733,7 @@ dependencies = [ "once_cell", "pathdiff", "petgraph", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_json", "smallvec 1.11.2", @@ -4126,7 +4126,7 @@ dependencies = [ "rand 0.8.5", "ref-cast", "schemars", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_json", "sled-agent-client", @@ -4638,7 +4638,7 @@ dependencies = [ "regress", "reqwest", "schemars", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_human_bytes", "serde_json", @@ -4827,7 +4827,7 @@ dependencies = [ "rustls", "samael", "schemars", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_json", "serde_urlencoded", @@ -4915,7 +4915,7 @@ dependencies = [ "rayon", "reqwest", "ring 0.17.7", - "semver 1.0.20", + "semver 1.0.21", "serde", "sled-hardware", "slog", @@ -5016,7 +5016,7 @@ dependencies = [ "rcgen", "reqwest", "schemars", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_human_bytes", "serde_json", @@ -5159,7 +5159,7 @@ dependencies = [ "ring 0.17.7", "rustix 0.38.25", "schemars", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_json", "sha2", @@ -5208,7 +5208,7 @@ dependencies = [ "hex", "reqwest", "ring 0.16.20", - "semver 1.0.20", + "semver 1.0.21", "serde", "serde_derive", "tar", @@ -7036,7 +7036,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.20", + "semver 1.0.21", ] [[package]] @@ -7357,9 +7357,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" dependencies = [ "serde", ] @@ -7375,9 +7375,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.193" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" dependencies = [ "serde_derive", ] @@ -7413,9 +7413,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index df0afd42e1..e531af4661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -318,7 +318,7 @@ rustyline = "12.0.0" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } schemars = "0.8.16" secrecy = "0.8.0" -semver = { version = "1.0.20", features = ["std", "serde"] } +semver = { version = "1.0.21", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e42a95a824..5fb2d4a72c 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -86,8 +86,8 @@ regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } -semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } +semver = { version = "1.0.21", features = ["serde"] } +serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } @@ -189,8 +189,8 @@ regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } -semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.193", features = ["alloc", "derive", "rc"] } +semver = { version = "1.0.21", features = ["serde"] } +serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } From 25989e03add73ca16106a780069ca6553166fa14 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 5 Jan 2024 19:20:12 -0500 Subject: [PATCH 149/186] Bump web console (bump deps) (#4767) https://github.com/oxidecomputer/console/compare/02c6ce74...bcc80258 * [bcc80258](https://github.com/oxidecomputer/console/commit/bcc80258) oxidecomputer/console#1873 * [2961f641](https://github.com/oxidecomputer/console/commit/2961f641) TS 5.3 (+ bump Vite and MSW) * [6c76299f](https://github.com/oxidecomputer/console/commit/6c76299f) oxidecomputer/console#1868 * [84570912](https://github.com/oxidecomputer/console/commit/84570912) oxidecomputer/console#1869 * [81d03254](https://github.com/oxidecomputer/console/commit/81d03254) msw default page size 100 to match nexus --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 785c535e8d..b12bcdbc9f 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="02c6ce747fd5dd05e9d454ecb1bf70392c9d954e" -SHA2="39fd191993e147a569e28df86414e3d0f33963b7675474d7c522c3f685d4d4f0" +COMMIT="bcc80258f7ddd99f6cc4c94f8cc62c012d08acad" +SHA2="5d219bd7b2e5bd6a23985988be4f557bc79880fb307b1a55c1eed4b2927a8fd5" From f65ffd8d356fc727f4991e43590a1a0186093015 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Jan 2024 19:07:22 -0800 Subject: [PATCH 150/186] [test-utils] run clickhouse in its own cwd (#4755) ClickHouse likes to dump a file called `preprocessed_configs/config.xml` in its cwd. By default the cwd is the source tree, so sometimes if a test fails this file gets left behind. Fix this by moving the cwd to `/cwd`. I also made a couple other changes in the PR: 1. Switched to `Utf8TempDir` etc, simplifying path handling code which already assumed UTF-8 paths anyway. 2. Moved the datastore into its own directory, `/datastore`. I figured this would minimize possible collisions between the datastore and the cwd. Fixes #4080. Fixes #4634. --- dev-tools/omicron-dev/src/bin/omicron-dev.rs | 18 +- test-utils/src/dev/clickhouse.rs | 211 +++++++++++++------ 2 files changed, 155 insertions(+), 74 deletions(-) diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index f8deae30d0..bbff4f6fe5 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -302,7 +302,7 @@ async fn start_single_node(port: u16) -> Result<(), anyhow::Error> { ); println!( "omicron-dev: using {} for ClickHouse data storage", - db_instance.data_path().display() + db_instance.data_path() ); // Wait for the DB to exit itself (an error), or for SIGINT @@ -381,39 +381,39 @@ async fn start_replicated_cluster() -> Result<(), anyhow::Error> { ); println!( "omicron-dev: using {} and {} for ClickHouse data storage", - cluster.replica_1.data_path().display(), - cluster.replica_2.data_path().display() + cluster.replica_1.data_path(), + cluster.replica_2.data_path() ); // Wait for the replicas and keepers to exit themselves (an error), or for SIGINT tokio::select! { _ = cluster.replica_1.wait_for_shutdown() => { cluster.replica_1.cleanup().await.context( - format!("clean up {} after shutdown", cluster.replica_1.data_path().display()) + format!("clean up {} after shutdown", cluster.replica_1.data_path()) )?; bail!("omicron-dev: ClickHouse replica 1 shutdown unexpectedly"); } _ = cluster.replica_2.wait_for_shutdown() => { cluster.replica_2.cleanup().await.context( - format!("clean up {} after shutdown", cluster.replica_2.data_path().display()) + format!("clean up {} after shutdown", cluster.replica_2.data_path()) )?; bail!("omicron-dev: ClickHouse replica 2 shutdown unexpectedly"); } _ = cluster.keeper_1.wait_for_shutdown() => { cluster.keeper_1.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_1.data_path().display()) + format!("clean up {} after shutdown", cluster.keeper_1.data_path()) )?; bail!("omicron-dev: ClickHouse keeper 1 shutdown unexpectedly"); } _ = cluster.keeper_2.wait_for_shutdown() => { cluster.keeper_2.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_2.data_path().display()) + format!("clean up {} after shutdown", cluster.keeper_2.data_path()) )?; bail!("omicron-dev: ClickHouse keeper 2 shutdown unexpectedly"); } _ = cluster.keeper_3.wait_for_shutdown() => { cluster.keeper_3.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_3.data_path().display()) + format!("clean up {} after shutdown", cluster.keeper_3.data_path()) )?; bail!("omicron-dev: ClickHouse keeper 3 shutdown unexpectedly"); } @@ -436,7 +436,7 @@ async fn start_replicated_cluster() -> Result<(), anyhow::Error> { instance .wait_for_shutdown() .await - .context(format!("clean up {} after SIGINT shutdown", instance.data_path().display()))?; + .context(format!("clean up {} after SIGINT shutdown", instance.data_path()))?; }; } } diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index 011de576ca..c73161eec7 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -9,8 +9,9 @@ use std::process::Stdio; use std::time::Duration; use anyhow::{anyhow, Context}; +use camino::{Utf8Path, Utf8PathBuf}; +use camino_tempfile::Utf8TempDir; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; -use tempfile::{Builder, TempDir}; use thiserror::Error; use tokio::{ fs::File, @@ -30,8 +31,8 @@ const CLICKHOUSE_KEEPER_TIMEOUT: Duration = Duration::from_secs(30); #[derive(Debug)] pub struct ClickHouseInstance { // Directory in which all data, logs, etc are stored. - data_dir: Option, - data_path: PathBuf, + data_dir: Option, + data_path: Utf8PathBuf, // The HTTP port the server is listening on port: u16, // The address the server is listening on @@ -63,21 +64,18 @@ pub enum ClickHouseError { impl ClickHouseInstance { /// Start a new single node ClickHouse server on the given IPv6 port. pub async fn new_single_node(port: u16) -> Result { - let data_dir = TempDir::new() - .context("failed to create tempdir for ClickHouse data")?; - let log_path = data_dir.path().join("clickhouse-server.log"); - let err_log_path = data_dir.path().join("clickhouse-server.errlog"); + let data_dir = ClickHouseDataDir::new()?; let args = vec![ "server".to_string(), "--log-file".to_string(), - log_path.display().to_string(), + data_dir.log_path().to_string(), "--errorlog-file".to_string(), - err_log_path.display().to_string(), + data_dir.err_log_path().to_string(), "--".to_string(), "--http_port".to_string(), format!("{}", port), "--path".to_string(), - data_dir.path().display().to_string(), + data_dir.datastore_path().to_string(), ]; let child = tokio::process::Command::new("clickhouse") @@ -87,6 +85,7 @@ impl ClickHouseInstance { .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::null()) + .current_dir(data_dir.cwd_path()) // By default ClickHouse forks a child if it's been explicitly // requested via the following environment variable, _or_ if it's // not attached to a TTY. Avoid this behavior, so that we can @@ -99,8 +98,8 @@ impl ClickHouseInstance { format!("failed to spawn `clickhouse` (with args: {:?})", &args) })?; - let data_path = data_dir.path().to_path_buf(); - let port = wait_for_port(log_path).await?; + let data_path = data_dir.root_path().to_path_buf(); + let port = wait_for_port(data_dir.log_path()).await?; let address = SocketAddr::new(IpAddr::V6(Ipv6Addr::LOCALHOST), port); @@ -123,14 +122,7 @@ impl ClickHouseInstance { r_number: String, config_path: PathBuf, ) -> Result { - let data_dir = TempDir::new() - .context("failed to create tempdir for ClickHouse data")?; - let log_path = data_dir.path().join("clickhouse-server.log"); - let err_log_path = data_dir.path().join("clickhouse-server.errlog"); - let tmp_path = data_dir.path().join("tmp/"); - let user_files_path = data_dir.path().join("user_files/"); - let access_path = data_dir.path().join("access/"); - let format_schemas_path = data_dir.path().join("format_schemas/"); + let data_dir = ClickHouseDataDir::new()?; let args = vec![ "server".to_string(), "--config-file".to_string(), @@ -142,19 +134,20 @@ impl ClickHouseInstance { .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::null()) + .current_dir(data_dir.cwd_path()) .env("CLICKHOUSE_WATCHDOG_ENABLE", "0") - .env("CH_LOG", &log_path) - .env("CH_ERROR_LOG", err_log_path) + .env("CH_LOG", data_dir.log_path()) + .env("CH_ERROR_LOG", data_dir.err_log_path()) .env("CH_REPLICA_DISPLAY_NAME", name) .env("CH_LISTEN_ADDR", "::") .env("CH_LISTEN_PORT", port.to_string()) .env("CH_TCP_PORT", tcp_port.to_string()) .env("CH_INTERSERVER_PORT", interserver_port.to_string()) - .env("CH_DATASTORE", data_dir.path()) - .env("CH_TMP_PATH", tmp_path) - .env("CH_USER_FILES_PATH", user_files_path) - .env("CH_USER_LOCAL_DIR", access_path) - .env("CH_FORMAT_SCHEMA_PATH", format_schemas_path) + .env("CH_DATASTORE", data_dir.datastore_path()) + .env("CH_TMP_PATH", data_dir.tmp_path()) + .env("CH_USER_FILES_PATH", data_dir.user_files_path()) + .env("CH_USER_LOCAL_DIR", data_dir.access_path()) + .env("CH_FORMAT_SCHEMA_PATH", data_dir.format_schemas_path()) .env("CH_REPLICA_NUMBER", r_number) .env("CH_REPLICA_HOST_01", "::1") .env("CH_REPLICA_HOST_02", "::1") @@ -169,10 +162,10 @@ impl ClickHouseInstance { format!("failed to spawn `clickhouse` (with args: {:?})", &args) })?; - let data_path = data_dir.path().to_path_buf(); + let data_path = data_dir.root_path().to_path_buf(); let address = SocketAddr::new(IpAddr::V6(Ipv6Addr::LOCALHOST), port); - let result = wait_for_ready(log_path).await; + let result = wait_for_ready(data_dir.log_path()).await; match result { Ok(()) => Ok(Self { data_dir: Some(data_dir), @@ -198,17 +191,8 @@ impl ClickHouseInstance { if ![1, 2, 3].contains(&k_id) { return Err(ClickHouseError::InvalidKeeperId.into()); } - // Keepers do not allow a dot in the beginning of the directory, so we must - // use a prefix. - let data_dir = Builder::new() - .prefix("k") - .tempdir() - .context("failed to create tempdir for ClickHouse Keeper data")?; - - let log_path = data_dir.path().join("clickhouse-keeper.log"); - let err_log_path = data_dir.path().join("clickhouse-keeper.err.log"); - let log_storage_path = data_dir.path().join("log"); - let snapshot_storage_path = data_dir.path().join("snapshots"); + let data_dir = ClickHouseDataDir::new()?; + let args = vec![ "keeper".to_string(), "--config-file".to_string(), @@ -221,14 +205,17 @@ impl ClickHouseInstance { .stdout(Stdio::null()) .stderr(Stdio::null()) .env("CLICKHOUSE_WATCHDOG_ENABLE", "0") - .env("CH_LOG", &log_path) - .env("CH_ERROR_LOG", err_log_path) + .env("CH_LOG", data_dir.keeper_log_path()) + .env("CH_ERROR_LOG", data_dir.keeper_err_log_path()) .env("CH_LISTEN_ADDR", "::") .env("CH_LISTEN_PORT", port.to_string()) .env("CH_KEEPER_ID_CURRENT", k_id.to_string()) - .env("CH_DATASTORE", data_dir.path()) - .env("CH_LOG_STORAGE_PATH", log_storage_path) - .env("CH_SNAPSHOT_STORAGE_PATH", snapshot_storage_path) + .env("CH_DATASTORE", data_dir.datastore_path()) + .env("CH_LOG_STORAGE_PATH", data_dir.keeper_log_storage_path()) + .env( + "CH_SNAPSHOT_STORAGE_PATH", + data_dir.keeper_snapshot_storage_path(), + ) .env("CH_KEEPER_ID_01", "1") .env("CH_KEEPER_ID_02", "2") .env("CH_KEEPER_ID_03", "3") @@ -243,10 +230,10 @@ impl ClickHouseInstance { ) })?; - let data_path = data_dir.path().to_path_buf(); + let data_path = data_dir.root_path().to_path_buf(); let address = SocketAddr::new(IpAddr::V6(Ipv6Addr::LOCALHOST), port); - let result = wait_for_ready(log_path).await; + let result = wait_for_ready(data_dir.keeper_log_path()).await; match result { Ok(()) => Ok(Self { data_dir: Some(data_dir), @@ -275,17 +262,13 @@ impl ClickHouseInstance { child.wait().await.context("waiting for child")?; } if let Some(dir) = self.data_dir.take() { - dir.close().context("Cleaning up temporary directory")?; - - // ClickHouse doesn't fully respect the `--path` flag, and still seems - // to put the `preprocessed_configs` directory in $CWD. - let _ = std::fs::remove_dir_all("./preprocessed_configs"); + dir.close()?; } Ok(()) } /// Return the full path to the directory used for the server's data. - pub fn data_path(&self) -> &Path { + pub fn data_path(&self) -> &Utf8Path { &self.data_path } @@ -305,6 +288,98 @@ impl ClickHouseInstance { } } +#[derive(Debug)] +struct ClickHouseDataDir { + dir: Utf8TempDir, +} + +impl ClickHouseDataDir { + fn new() -> Result { + // Keepers do not allow a dot in the beginning of the directory, so we must + // use a prefix. + let dir = Utf8TempDir::with_prefix("clickhouse-") + .context("failed to create tempdir for ClickHouse data")?; + + let ret = Self { dir }; + // Create some of the directories. We specify a custom cwd because + // clickhouse doesn't always respect the --path flag and stores, in + // particular, files in `preprocessed_configs/`. + std::fs::create_dir(ret.datastore_path()) + .context("failed to create datastore directory")?; + std::fs::create_dir(ret.cwd_path()) + .context("failed to create cwd directory")?; + std::fs::create_dir(ret.keeper_log_storage_path()) + .context("failed to create keeper log directory")?; + std::fs::create_dir(ret.keeper_snapshot_storage_path()) + .context("failed to create keeper snapshot directory")?; + + Ok(ret) + } + + fn root_path(&self) -> &Utf8Path { + self.dir.path() + } + + fn datastore_path(&self) -> Utf8PathBuf { + self.dir.path().join("datastore/") + } + + fn cwd_path(&self) -> Utf8PathBuf { + self.dir.path().join("cwd/") + } + + fn log_path(&self) -> Utf8PathBuf { + self.dir.path().join("clickhouse-server.log") + } + + fn err_log_path(&self) -> Utf8PathBuf { + self.dir.path().join("clickhouse-server.errlog") + } + + fn tmp_path(&self) -> Utf8PathBuf { + self.dir.path().join("tmp/") + } + + fn user_files_path(&self) -> Utf8PathBuf { + self.dir.path().join("user_files/") + } + + fn access_path(&self) -> Utf8PathBuf { + self.dir.path().join("access/") + } + + fn format_schemas_path(&self) -> Utf8PathBuf { + self.dir.path().join("format_schemas/") + } + + fn keeper_log_path(&self) -> Utf8PathBuf { + self.dir.path().join("clickhouse-keeper.log") + } + + fn keeper_err_log_path(&self) -> Utf8PathBuf { + self.dir.path().join("clickhouse-keeper.errlog") + } + + fn keeper_log_storage_path(&self) -> Utf8PathBuf { + // ClickHouse keeper chokes on log paths having trailing slashes, + // producing messages like: + // + // Application: DB::Exception: Invalid changelog + // /tmp/clickhouse-lSv3IU/log/uuid + // + // So we don't include a trailing slash for this specific path. + self.dir.path().join("log") + } + + fn keeper_snapshot_storage_path(&self) -> Utf8PathBuf { + self.dir.path().join("snapshots/") + } + + fn close(self) -> Result<(), anyhow::Error> { + self.dir.close().context("failed to delete ClickHouse data dir") + } +} + impl Drop for ClickHouseInstance { fn drop(&mut self) { if self.child.is_some() || self.data_dir.is_some() { @@ -440,7 +515,9 @@ impl ClickHouseCluster { // _learn_ the port, which introduces the possibility that we return // from this function successfully, but the server itself is not yet // ready to accept connections. -pub async fn wait_for_port(log_path: PathBuf) -> Result { +pub async fn wait_for_port( + log_path: Utf8PathBuf, +) -> Result { let p = poll::wait_for_condition( || async { let result = @@ -476,7 +553,7 @@ pub async fn wait_for_port(log_path: PathBuf) -> Result { // Parse the ClickHouse log file at the given path, looking for a line reporting the port number of // the HTTP server. This is only used if the port is chosen by the OS, not the caller. async fn discover_local_listening_port( - path: &Path, + path: &Utf8Path, timeout: Duration, ) -> Result { let timeout = Instant::now() + timeout; @@ -490,7 +567,7 @@ async fn discover_local_listening_port( // NOTE: This function loops forever until the expected line is found. It should be run under a // timeout, or some other mechanism for cancelling it. async fn find_clickhouse_port_in_log( - path: &Path, + path: &Utf8Path, ) -> Result { let mut reader = BufReader::new(File::open(path).await?); const NEEDLE: &str = @@ -527,7 +604,9 @@ async fn find_clickhouse_port_in_log( } // Wait for the ClickHouse log file to report it is ready to receive connections -pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { +pub async fn wait_for_ready( + log_path: Utf8PathBuf, +) -> Result<(), anyhow::Error> { let p = poll::wait_for_condition( || async { let result = @@ -561,7 +640,7 @@ pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { // Parse the ClickHouse log file at the given path, looking for a line reporting that the server // is ready for connections. async fn discover_ready( - path: &Path, + path: &Utf8Path, timeout: Duration, ) -> Result<(), ClickHouseError> { let timeout = Instant::now() + timeout; @@ -574,7 +653,9 @@ async fn discover_ready( // // NOTE: This function loops forever until the expected line is found. It should be run under a // timeout, or some other mechanism for cancelling it. -async fn clickhouse_ready_from_log(path: &Path) -> Result<(), ClickHouseError> { +async fn clickhouse_ready_from_log( + path: &Utf8Path, +) -> Result<(), ClickHouseError> { let mut reader = BufReader::new(File::open(path).await?); const READY: &str = " Application: Ready for connections"; let mut lines = reader.lines(); @@ -605,9 +686,9 @@ mod tests { discover_local_listening_port, discover_ready, ClickHouseError, CLICKHOUSE_TIMEOUT, }; + use camino_tempfile::NamedUtf8TempFile; use std::process::Stdio; use std::{io::Write, sync::Arc, time::Duration}; - use tempfile::NamedTempFile; use tokio::{sync::Mutex, task::spawn, time::sleep}; const EXPECTED_PORT: u16 = 12345; @@ -626,7 +707,7 @@ mod tests { #[tokio::test] async fn test_discover_local_listening_port() { // Write some data to a fake log file - let mut file = NamedTempFile::new().unwrap(); + let mut file = NamedUtf8TempFile::new().unwrap(); writeln!(file, "A garbage line").unwrap(); writeln!( file, @@ -648,7 +729,7 @@ mod tests { #[tokio::test] async fn test_discover_clickhouse_ready() { // Write some data to a fake log file - let mut file = NamedTempFile::new().unwrap(); + let mut file = NamedUtf8TempFile::new().unwrap(); writeln!(file, "A garbage line").unwrap(); writeln!( file, @@ -667,7 +748,7 @@ mod tests { #[tokio::test] async fn test_discover_clickhouse_not_ready() { // Write some data to a fake log file - let mut file = NamedTempFile::new().unwrap(); + let mut file = NamedUtf8TempFile::new().unwrap(); writeln!(file, "A garbage line").unwrap(); writeln!( file, @@ -716,7 +797,7 @@ mod tests { writer_interval: Duration, ) -> Result { async fn write_and_wait( - file: &mut NamedTempFile, + file: &mut NamedUtf8TempFile, line: String, interval: Duration, ) { @@ -738,7 +819,7 @@ mod tests { // the `NamedTempFile`. If the owning task completes, that may delete the file before the // other task accesses it. So we need interior mutability (because one of the references is // mutable for writing), and _this_ scope must own it. - let file = Arc::new(Mutex::new(NamedTempFile::new()?)); + let file = Arc::new(Mutex::new(NamedUtf8TempFile::new()?)); let path = file.lock().await.path().to_path_buf(); let writer_file = file.clone(); let writer_task = spawn(async move { From c4a1e458518a6422add09165c9ba55d6ee0bb633 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 06:48:37 +0000 Subject: [PATCH 151/186] Update taiki-e/install-action digest to c63cad0 (#4768) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`115b656` -> `c63cad0`](https://togithub.com/taiki-e/install-action/compare/115b656...c63cad0) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 0627ea1563..f30775f819 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@115b656342518960cf3dbf5c01f62b684985ca11 # v2 + uses: taiki-e/install-action@c63cad0540fb5357c70e2481e3da40d7649add24 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 590eb5875389751b2330dc7ee9f4720a09d974ca Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 12:43:28 -0800 Subject: [PATCH 152/186] Update Rust crate serde_path_to_error to 0.1.15 (#4770) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c75adec36..cf77d72617 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7455,9 +7455,9 @@ dependencies = [ [[package]] name = "serde_path_to_error" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" +checksum = "ebd154a240de39fdebcf5775d2675c204d7c13cf39a4c697be6493c8e734337c" dependencies = [ "itoa", "serde", diff --git a/Cargo.toml b/Cargo.toml index e531af4661..17a10e2306 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -323,7 +323,7 @@ serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } serde_json = "1.0.108" -serde_path_to_error = "0.1.14" +serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" serde_with = "3.4.0" From cde9b1547ea928c1b6ce0c11bfd09690c2502549 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 12:43:44 -0800 Subject: [PATCH 153/186] Update Rust crate tokio to 1.35.1 (#4772) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf77d72617..eac227892a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8724,9 +8724,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.35.0" +version = "1.35.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" +checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 17a10e2306..1124046702 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -371,7 +371,7 @@ textwrap = "0.16.0" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.35.0" +tokio = "1.35.1" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5fb2d4a72c..b16310d478 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -99,7 +99,7 @@ subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.35.0", features = ["full", "test-util"] } +tokio = { version = "1.35.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } @@ -203,7 +203,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.35.0", features = ["full", "test-util"] } +tokio = { version = "1.35.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } From 688167a0a3059c90db8fa1b3bbaf178873bd90af Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 8 Jan 2024 18:04:52 -0800 Subject: [PATCH 154/186] [tufaceous-lib] make DeserializedManifest public and serializable, fix serialization (#4781) This is prep work for #4690. I'd like to add support for making changes to a DeserializedManifest, then serializing it. However, it turned out that the bytesize crate does not serialize bytes correctly (https://github.com/hyunsik/bytesize/issues/40). To address this, just use a u64 and write our own deserializer, using the alternative parse-size crate. Also add a test to ensure that serialization of the fake manifest roundtrips correctly. --- Cargo.lock | 17 ++- Cargo.toml | 2 +- tufaceous-lib/Cargo.toml | 2 +- tufaceous-lib/src/assemble/manifest.rs | 159 ++++++++++++++++++++----- 4 files changed, 141 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eac227892a..d9d85bdc79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -707,15 +707,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bytesize" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" -dependencies = [ - "serde", -] - [[package]] name = "bzip2" version = "0.4.4" @@ -5709,6 +5700,12 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "parse-size" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "944553dd59c802559559161f9816429058b869003836120e262e8caec061b7ae" + [[package]] name = "partial-io" version = "0.5.4" @@ -9137,7 +9134,6 @@ dependencies = [ "async-trait", "buf-list", "bytes", - "bytesize", "camino", "camino-tempfile", "chrono", @@ -9151,6 +9147,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", + "parse-size", "rand 0.8.5", "ring 0.17.7", "serde", diff --git a/Cargo.toml b/Cargo.toml index 1124046702..f08a1691ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,7 +160,6 @@ bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } byteorder = "1.5.0" bytes = "1.5.0" -bytesize = "1.3.0" camino = "1.1" camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" @@ -283,6 +282,7 @@ oximeter-producer = { path = "oximeter/producer" } p256 = "0.13" parse-display = "0.8.2" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } +parse-size = "1.0.0" paste = "1.0.14" percent-encoding = "2.3.1" pem = "3.0" diff --git a/tufaceous-lib/Cargo.toml b/tufaceous-lib/Cargo.toml index aa9a26e3bb..7f67d21f35 100644 --- a/tufaceous-lib/Cargo.toml +++ b/tufaceous-lib/Cargo.toml @@ -10,7 +10,6 @@ anyhow = { workspace = true, features = ["backtrace"] } async-trait.workspace = true buf-list.workspace = true bytes.workspace = true -bytesize = { workspace = true, features = ["serde"] } camino.workspace = true camino-tempfile.workspace = true chrono.workspace = true @@ -22,6 +21,7 @@ hex.workspace = true hubtools.workspace = true itertools.workspace = true omicron-common.workspace = true +parse-size.workspace = true rand.workspace = true ring = { workspace = true, features = ["std"] } serde.workspace = true diff --git a/tufaceous-lib/src/assemble/manifest.rs b/tufaceous-lib/src/assemble/manifest.rs index 437b84e7b0..35bc3f5930 100644 --- a/tufaceous-lib/src/assemble/manifest.rs +++ b/tufaceous-lib/src/assemble/manifest.rs @@ -5,18 +5,21 @@ use std::collections::{BTreeMap, BTreeSet}; use anyhow::{bail, ensure, Context, Result}; -use bytesize::ByteSize; use camino::{Utf8Path, Utf8PathBuf}; use omicron_common::api::{ external::SemverVersion, internal::nexus::KnownArtifactKind, }; -use serde::Deserialize; +use parse_size::parse_size; +use serde::{Deserialize, Serialize}; use crate::{ make_filler_text, ArtifactSource, CompositeControlPlaneArchiveBuilder, CompositeHostArchiveBuilder, CompositeRotArchiveBuilder, }; +static FAKE_MANIFEST_TOML: &str = + include_str!("../../../tufaceous/manifests/fake.toml"); + /// A list of components in a TUF repo representing a single update. #[derive(Clone, Debug)] pub struct ArtifactManifest { @@ -36,10 +39,15 @@ impl ArtifactManifest { /// Deserializes a manifest from an input string. pub fn from_str(base_dir: &Utf8Path, input: &str) -> Result { - let de = toml::Deserializer::new(input); - let manifest: DeserializedManifest = - serde_path_to_error::deserialize(de)?; + let manifest = DeserializedManifest::from_str(input)?; + Self::from_deserialized(base_dir, manifest) + } + /// Creates a manifest from a [`DeserializedManifest`]. + pub fn from_deserialized( + base_dir: &Utf8Path, + manifest: DeserializedManifest, + ) -> Result { // Replace all paths in the deserialized manifest with absolute ones, // and do some processing to support flexible manifests: // @@ -58,6 +66,7 @@ impl ArtifactManifest { // `KnownArtifactKind`s. It would be nicer to enforce this more // statically and let serde do these checks, but that seems relatively // tricky in comparison to these checks. + Ok(ArtifactManifest { system_version: manifest.system_version, artifacts: manifest @@ -88,7 +97,7 @@ impl ArtifactManifest { kind, &data.version, ) - .make_data(size.0 as usize); + .make_data(size as usize); ArtifactSource::Memory(fake_data.into()) } DeserializedArtifactSource::CompositeHost { @@ -210,8 +219,6 @@ impl ArtifactManifest { /// Returns a fake manifest. Useful for testing. pub fn new_fake() -> Self { - static FAKE_MANIFEST_TOML: &str = - include_str!("../../../tufaceous/manifests/fake.toml"); // The base directory doesn't matter for fake manifests. Self::from_str(".".into(), FAKE_MANIFEST_TOML) .expect("the fake manifest is a valid manifest") @@ -298,30 +305,51 @@ pub struct ArtifactData { /// we don't expose the `Deserialize` impl on `ArtifactManifest, forcing /// consumers to go through [`ArtifactManifest::from_path`] or /// [`ArtifactManifest::from_str`]. -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] -struct DeserializedManifest { - system_version: SemverVersion, +pub struct DeserializedManifest { + pub system_version: SemverVersion, #[serde(rename = "artifact")] - artifacts: BTreeMap>, + pub artifacts: BTreeMap>, } -#[derive(Clone, Debug, Deserialize)] +impl DeserializedManifest { + pub fn from_path(path: &Utf8Path) -> Result { + let input = fs_err::read_to_string(path)?; + Self::from_str(&input).with_context(|| { + format!("error deserializing manifest from {path}") + }) + } + + pub fn from_str(input: &str) -> Result { + let de = toml::Deserializer::new(input); + serde_path_to_error::deserialize(de) + .context("error deserializing manifest") + } + + /// Returns the fake manifest. + pub fn fake() -> Self { + Self::from_str(FAKE_MANIFEST_TOML).unwrap() + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] -struct DeserializedArtifactData { +pub struct DeserializedArtifactData { pub name: String, pub version: SemverVersion, pub source: DeserializedArtifactSource, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "kind", rename_all = "kebab-case")] -enum DeserializedArtifactSource { +pub enum DeserializedArtifactSource { File { path: Utf8PathBuf, }, Fake { - size: ByteSize, + #[serde(deserialize_with = "deserialize_byte_size")] + size: u64, }, CompositeHost { phase_1: DeserializedFileArtifactSource, @@ -336,11 +364,16 @@ enum DeserializedArtifactSource { }, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "kind", rename_all = "snake_case")] -enum DeserializedFileArtifactSource { - File { path: Utf8PathBuf }, - Fake { size: ByteSize }, +pub enum DeserializedFileArtifactSource { + File { + path: Utf8PathBuf, + }, + Fake { + #[serde(deserialize_with = "deserialize_byte_size")] + size: u64, + }, } impl DeserializedFileArtifactSource { @@ -354,18 +387,24 @@ impl DeserializedFileArtifactSource { .with_context(|| format!("failed to read {path}"))? } DeserializedFileArtifactSource::Fake { size } => { - fake_attr.make_data(size.0 as usize) + fake_attr.make_data(*size as usize) } }; f(data) } } -#[derive(Clone, Debug, Deserialize, serde::Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "kind", rename_all = "snake_case")] -enum DeserializedControlPlaneZoneSource { - File { path: Utf8PathBuf }, - Fake { name: String, size: ByteSize }, +pub enum DeserializedControlPlaneZoneSource { + File { + path: Utf8PathBuf, + }, + Fake { + name: String, + #[serde(deserialize_with = "deserialize_byte_size")] + size: u64, + }, } impl DeserializedControlPlaneZoneSource { @@ -383,10 +422,76 @@ impl DeserializedControlPlaneZoneSource { (name, data) } DeserializedControlPlaneZoneSource::Fake { name, size } => { - let data = make_filler_text(size.0 as usize); + let data = make_filler_text(*size as usize); (name.as_str(), data) } }; f(name, data) } } + +fn deserialize_byte_size<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + // Attempt to deserialize the size as either a string or an integer. + + struct Visitor; + + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = u64; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter + .write_str("a string representing a byte size or an integer") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + parse_size(value).map_err(|_| { + serde::de::Error::invalid_value( + serde::de::Unexpected::Str(value), + &self, + ) + }) + } + + // TOML uses i64, not u64 + fn visit_i64(self, value: i64) -> Result + where + E: serde::de::Error, + { + Ok(value as u64) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + Ok(value) + } + } + + deserializer.deserialize_any(Visitor) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Ensure that the fake manifest roundtrips after serialization and + // deserialization. + #[test] + fn fake_roundtrip() { + let manifest = DeserializedManifest::fake(); + let toml = toml::to_string(&manifest).unwrap(); + let deserialized = DeserializedManifest::from_str(&toml) + .expect("fake manifest is a valid manifest"); + assert_eq!(manifest, deserialized); + } +} From 820f6ee8a27b916d3c2a40b5092b537812a767c5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:14:25 +0000 Subject: [PATCH 155/186] Update Rust crate serde_json to 1.0.111 (#4769) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d9d85bdc79..65e58d39fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7441,9 +7441,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index f08a1691ca..ba5bdb010b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -322,7 +322,7 @@ semver = { version = "1.0.21", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.108" +serde_json = "1.0.111" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b16310d478..cff10b60ce 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -88,7 +88,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -191,7 +191,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 150954297f839205b2838f169b926ce0893b4583 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:44:46 +0000 Subject: [PATCH 156/186] Update Rust crate trybuild to 1.0.88 (#4773) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65e58d39fd..3db966876d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,9 +417,9 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "basic-toml" -version = "0.1.4" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bfc506e7a2370ec239e1d072507b2a80c833083699d3c6fa176fbb4de8448c6" +checksum = "2db21524cad41c5591204d22d75e1970a2d1f71060214ca931dc7d5afe2c14e5" dependencies = [ "serde", ] @@ -9087,9 +9087,9 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "trybuild" -version = "1.0.85" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196a58260a906cedb9bf6d8034b6379d0c11f552416960452f267402ceeddff1" +checksum = "76de4f783e610194f6c98bfd53f9fc52bb2e0d02c947621e8a0f4ecc799b2880" dependencies = [ "basic-toml", "glob", diff --git a/Cargo.toml b/Cargo.toml index ba5bdb010b..8553a7244b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -384,7 +384,7 @@ trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" trust-dns-server = "0.22" -trybuild = "1.0.85" +trybuild = "1.0.88" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } unicode-width = "0.1.11" From 9fe8a3cf614a3430365e9205bb24ae0940e5d959 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 03:05:59 -0800 Subject: [PATCH 157/186] [tests] time out tests in test_all after a duration (#4780) We're seeing some tests in `test_all` hang forever (#4779). Set a reasonable upper bound on test duration. This will also cause stdout and stderr for failing tests to be printed. Doing so on SIGTERM in general is tracked at https://github.com/nextest-rs/nextest/issues/1208. Also, bump up the required nextest version to 0.9.64 to make use of the `binary_id` predicate. --- .config/nextest.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 4f927d2396..136a21a236 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.59", recommended = "0.9.64" } +nextest-version = { required = "0.9.64", recommended = "0.9.64" } experimental = ["setup-scripts"] @@ -32,3 +32,9 @@ clickhouse-cluster = { max-threads = 1 } [[profile.default.overrides]] filter = 'package(oximeter-db) and test(replicated)' test-group = 'clickhouse-cluster' + +[[profile.ci.overrides]] +filter = 'binary_id(omicron-nexus::test_all)' +# As of 2023-01-08, the slowest test in test_all takes 196s on a Ryzen 7950X. +# 900s is a good upper limit that adds a comfortable buffer. +slow-timeout = { period = '60s', terminate-after = 15 } From a6245c414fd5d49c669379db44baf028c14eaa66 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 11:54:35 +0000 Subject: [PATCH 158/186] Update taiki-e/install-action digest to 2f4c386 (#4784) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`c63cad0` -> `2f4c386`](https://togithub.com/taiki-e/install-action/compare/c63cad0...2f4c386) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index f30775f819..f758bd79b9 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@c63cad0540fb5357c70e2481e3da40d7649add24 # v2 + uses: taiki-e/install-action@2f4c386a81aeab009d470320dfc6e0930ee4e064 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From e4522e52a720cc4721600a03cf924c3bf043a883 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 9 Jan 2024 14:35:59 -0800 Subject: [PATCH 159/186] [schema][test] Add a data migration validation test (#4783) Adds a schema test with "before" / "after" hooks, and adds an example specifically for the "23.0.0" migration. My intent is that this can be used for any other schema migrations that would like to execute arbitrary SQL checks against the new schema too. Fixes https://github.com/oxidecomputer/omicron/issues/4747 --- nexus/tests/integration_tests/schema.rs | 280 ++++++++++++++++++++++-- 1 file changed, 265 insertions(+), 15 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 6feafe415d..21ed99e010 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -5,6 +5,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; +use futures::future::BoxFuture; use nexus_db_model::schema::SCHEMA_VERSION as LATEST_SCHEMA_VERSION; use nexus_db_queries::db::datastore::{ all_sql_for_version_migration, EARLIEST_SUPPORTED_VERSION, @@ -14,7 +15,7 @@ use omicron_common::api::external::SemverVersion; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::Config; use omicron_common::nexus_config::SchemaConfig; -use omicron_test_utils::dev::db::CockroachInstance; +use omicron_test_utils::dev::db::{Client, CockroachInstance}; use pretty_assertions::{assert_eq, assert_ne}; use similar_asserts; use slog::Logger; @@ -163,6 +164,18 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { version } +#[derive(PartialEq, Clone, Debug)] +struct SqlEnum { + name: String, + variant: String, +} + +impl From<(&str, &str)> for SqlEnum { + fn from((name, variant): (&str, &str)) -> Self { + Self { name: name.to_string(), variant: variant.to_string() } + } +} + // A newtype wrapper around a string, which allows us to more liberally // interpret SQL types. // @@ -170,19 +183,57 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { // the contents of the database, merely the schema and equality of contained data. #[derive(PartialEq, Clone, Debug)] enum AnySqlType { - DateTime, - String(String), Bool(bool), - Uuid(Uuid), - Int8(i64), + DateTime, + Enum(SqlEnum), Float4(f32), + Int8(i64), + Json(serde_json::value::Value), + String(String), TextArray(Vec), + Uuid(Uuid), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to // populate data. } +impl From for AnySqlType { + fn from(b: bool) -> Self { + Self::Bool(b) + } +} + +impl From for AnySqlType { + fn from(value: SqlEnum) -> Self { + Self::Enum(value) + } +} + +impl From for AnySqlType { + fn from(value: f32) -> Self { + Self::Float4(value) + } +} + +impl From for AnySqlType { + fn from(value: i64) -> Self { + Self::Int8(value) + } +} + +impl From for AnySqlType { + fn from(value: String) -> Self { + Self::String(value) + } +} + +impl From for AnySqlType { + fn from(value: Uuid) -> Self { + Self::Uuid(value) + } +} + impl AnySqlType { fn as_str(&self) -> &str { match self { @@ -218,15 +269,33 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { if f32::accepts(ty) { return Ok(AnySqlType::Float4(f32::from_sql(ty, raw)?)); } + if serde_json::value::Value::accepts(ty) { + return Ok(AnySqlType::Json(serde_json::value::Value::from_sql( + ty, raw, + )?)); + } if Vec::::accepts(ty) { return Ok(AnySqlType::TextArray(Vec::::from_sql( ty, raw, )?)); } - Err(anyhow::anyhow!( - "Cannot parse type {ty}. If you're trying to use this type in a table which is populated \ -during a schema migration, consider adding it to `AnySqlType`." - ).into()) + + use tokio_postgres::types::Kind; + match ty.kind() { + Kind::Enum(_) => { + Ok(AnySqlType::Enum(SqlEnum { + name: ty.name().to_string(), + variant: std::str::from_utf8(raw)?.to_string(), + })) + }, + _ => { + Err(anyhow::anyhow!( + "Cannot parse type {ty:?}. \ + If you're trying to use this type in a table which is populated \ + during a schema migration, consider adding it to `AnySqlType`." + ).into()) + } + } } fn accepts(_ty: &tokio_postgres::types::Type) -> bool { @@ -234,15 +303,19 @@ during a schema migration, consider adding it to `AnySqlType`." } } +// It's a little redunant to include the column name alongside each value, +// but it results in a prettier diff. #[derive(PartialEq, Debug)] -struct NamedSqlValue { - // It's a little redunant to include the column name alongside each value, - // but it results in a prettier diff. +struct ColumnValue { column: String, value: Option, } -impl NamedSqlValue { +impl ColumnValue { + fn new>(column: &str, value: V) -> Self { + Self { column: String::from(column), value: Some(value.into()) } + } + fn expect(&self, column: &str) -> Option<&AnySqlType> { assert_eq!(self.column, column); self.value.as_ref() @@ -252,7 +325,7 @@ impl NamedSqlValue { // A generic representation of a row of SQL data #[derive(PartialEq, Debug)] struct Row { - values: Vec, + values: Vec, } impl Row { @@ -278,7 +351,7 @@ fn process_rows(rows: &Vec) -> Vec { let mut row_result = Row::new(); for i in 0..row.len() { let column_name = row.columns()[i].name(); - row_result.values.push(NamedSqlValue { + row_result.values.push(ColumnValue { column: column_name.to_string(), value: row.get(i), }); @@ -849,6 +922,183 @@ async fn dbinit_equals_sum_of_all_up() { logctx.cleanup_successful(); } +type BeforeFn = for<'a> fn(client: &'a Client) -> BoxFuture<'a, ()>; +type AfterFn = for<'a> fn(client: &'a Client) -> BoxFuture<'a, ()>; + +// Describes the operations which we might take before and after +// migrations to check that they worked. +struct DataMigrationFns { + before: Option, + after: AfterFn, +} + +// "51F0" -> "Silo" +const SILO1: Uuid = Uuid::from_u128(0x111151F0_5c3d_4647_83b0_8f3515da7be1); +const SILO2: Uuid = Uuid::from_u128(0x222251F0_5c3d_4647_83b0_8f3515da7be1); + +// "6001" -> "Pool" +const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); +const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); +const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); + +fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async move { + // Create two silos + client.batch_execute(&format!("INSERT INTO silo + (id, name, description, time_created, time_modified, time_deleted, discoverable, authentication_mode, user_provision_type, mapped_fleet_roles, rcgen) VALUES + ('{SILO1}', 'silo1', '', now(), now(), NULL, false, 'local', 'jit', '{{}}', 1), + ('{SILO2}', 'silo2', '', now(), now(), NULL, false, 'local', 'jit', '{{}}', 1); + ")).await.expect("Failed to create silo"); + + // Create an IP pool for each silo, and a third "fleet pool" which has + // no corresponding silo. + client.batch_execute(&format!("INSERT INTO ip_pool + (id, name, description, time_created, time_modified, time_deleted, rcgen, silo_id, is_default) VALUES + ('{POOL1}', 'pool1', '', now(), now(), NULL, 1, '{SILO1}', true), + ('{POOL2}', 'pool2', '', now(), now(), NULL, 1, '{SILO2}', false), + ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true); + ")).await.expect("Failed to create IP Pool"); + }) +} + +fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + // Confirm that the ip_pool_resource objects have been created + // by the migration. + let rows = client + .query("SELECT * FROM ip_pool_resource ORDER BY ip_pool_id", &[]) + .await + .expect("Failed to query ip pool resource"); + let ip_pool_resources = process_rows(&rows); + + assert_eq!(ip_pool_resources.len(), 4); + + let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + + // pool1, which referenced silo1 in the "ip_pool" table, has a newly + // created resource. + // + // The same relationship is true for pool2 / silo2. + assert_eq!( + ip_pool_resources[0].values, + vec![ + ColumnValue::new("ip_pool_id", POOL1), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO1), + ColumnValue::new("is_default", true), + ], + ); + assert_eq!( + ip_pool_resources[1].values, + vec![ + ColumnValue::new("ip_pool_id", POOL2), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO2), + ColumnValue::new("is_default", false), + ], + ); + + // pool3 did not previously have a corresponding silo, so now it's associated + // with both silos as a new resource in each. + // + // Additionally, silo1 already had a default pool (pool1), but silo2 did + // not have one. As a result, pool3 becomes the new default pool for silo2. + assert_eq!( + ip_pool_resources[2].values, + vec![ + ColumnValue::new("ip_pool_id", POOL3), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO1), + ColumnValue::new("is_default", false), + ], + ); + assert_eq!( + ip_pool_resources[3].values, + vec![ + ColumnValue::new("ip_pool_id", POOL3), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO2), + ColumnValue::new("is_default", true), + ], + ); + }) +} + +// Lazily initializes all migration checks. The combination of Rust function +// pointers and async makes defining a static table fairly painful, so we're +// using lazy initialization instead. +// +// Each "check" is implemented as a pair of {before, after} migration function +// pointers, called precisely around the migration under test. +fn get_migration_checks() -> BTreeMap { + let mut map = BTreeMap::new(); + + map.insert( + SemverVersion(semver::Version::parse("23.0.0").unwrap()), + DataMigrationFns { before: Some(before_23_0_0), after: after_23_0_0 }, + ); + + map +} + +// Performs all schema changes and runs version-specific assertions. +// +// HOW TO ADD A MIGRATION CHECK: +// - Add a new "map.insert" line to "get_migration_checks", with the semver of +// the version you'd like to inspect before / after. +// - Define your "before" (optional) and "after" (required) functions. These +// act on a connection to CockroachDB, and can observe and mutate arbitrary +// state. +// +// ADVICE FOR MIGRATION CHECKS: +// - Your migration check will run in the same test as all other migration +// checks, because performing schema migrations isn't that fast. If you +// perform an operation that could be disruptive to subsequent checks, I +// recommend cleaning up after yourself (e.g., DELETE relevant rows). +// - I recommend using schema checks that are NOT strongly-typed. When you +// add a migration check, it'll happen to match the "latest" static schemas +// defined by Nexus, but that won't always be the case. As the schema +// continues to change (maybe a table you're trying to check gets a new column +// in a later version), your code should continue operating on the OLD version, +// and as such, should avoid needing any updates. +#[tokio::test] +async fn validate_data_migration() { + let config = load_test_config(); + let logctx = LogContext::new("validate_data_migration", &config.pkg.log); + let log = &logctx.log; + + let populate = false; + let mut crdb = test_setup_just_crdb(&logctx.log, populate).await; + let client = crdb.connect().await.expect("Failed to access CRDB client"); + + let all_versions = read_all_schema_versions().await; + let all_checks = get_migration_checks(); + + // Go from the first version to the latest version. + for version in &all_versions { + // If this check has preconditions (or setup), run them. + let checks = all_checks.get(version); + if let Some(before) = checks.and_then(|check| check.before) { + before(&client).await; + } + + apply_update(log, &crdb, &version.to_string(), 1).await; + assert_eq!(version.to_string(), query_crdb_schema_version(&crdb).await); + + // If this check has postconditions (or cleanup), run them. + if let Some(after) = checks.map(|check| check.after) { + after(&client).await; + } + } + assert_eq!( + LATEST_SCHEMA_VERSION.to_string(), + query_crdb_schema_version(&crdb).await + ); + + crdb.cleanup().await.unwrap(); + logctx.cleanup_successful(); +} + // Returns the InformationSchema object for a database populated via `sql`. async fn get_information_schema(log: &Logger, sql: &str) -> InformationSchema { let populate = false; From 45b665184ad6254f5b2bb969abefbf476e0803c8 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 15:18:45 -0800 Subject: [PATCH 160/186] [ci] update nextest to 0.9.67 (#4788) This version of nextest has a fix for https://github.com/nextest-rs/nextest/issues/1208, which we encountered while attempting to diagnose #4779. --- .config/nextest.toml | 2 +- .github/buildomat/build-and-test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 136a21a236..ba28fa0625 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.64", recommended = "0.9.64" } +nextest-version = { required = "0.9.64", recommended = "0.9.67" } experimental = ["setup-scripts"] diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 34f81bab68..eab64c528c 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -7,7 +7,7 @@ set -o xtrace # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) -NEXTEST_VERSION='0.9.64' +NEXTEST_VERSION='0.9.67' cargo --version rustc --version From 9eba46df9980aa76a5043cf0300feddc6e79202c Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 18:27:32 -0800 Subject: [PATCH 161/186] [nexus-test-utils] set 60s timeouts for each init step (#4789) In #4779 we're tracking what appears to be a ClickHouse initialization failure during Nexus startup. Set a timeout of 60s for each step in the initialization process. These steps should usually not take more than 5 seconds each, so 60s is a really comfortable buffer. --- Cargo.lock | 2 + common/src/api/external/mod.rs | 2 +- nexus/test-interface/src/lib.rs | 4 +- nexus/test-utils/Cargo.toml | 2 + nexus/test-utils/src/lib.rs | 160 +++++++++++++++++++++++++++----- 5 files changed, 142 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3db966876d..2974dfe98e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4265,6 +4265,7 @@ dependencies = [ "dns-server", "dns-service-client", "dropshot", + "futures", "gateway-messages", "gateway-test-utils", "headers", @@ -4286,6 +4287,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "slog", + "tokio", "trust-dns-resolver", "uuid", ] diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 312d400d2f..899f15a04b 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1919,7 +1919,7 @@ impl MacAddr { /// Iterate the MAC addresses in the system address range /// (used as an allocator in contexts where collisions are not expected and /// determinism is useful, like in the test suite) - pub fn iter_system() -> impl Iterator { + pub fn iter_system() -> impl Iterator + Send { ((Self::MAX_SYSTEM_RESV + 1)..=Self::MAX_SYSTEM_ADDR) .map(Self::from_i64) } diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 2456f27684..23326a5ecb 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -38,8 +38,8 @@ use std::net::{SocketAddr, SocketAddrV6}; use uuid::Uuid; #[async_trait] -pub trait NexusServer { - type InternalServer; +pub trait NexusServer: Send + Sync + 'static { + type InternalServer: Send + Sync + 'static; async fn start_internal( config: &Config, diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 024cba958b..4a7924770e 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -14,6 +14,7 @@ crucible-agent-client.workspace = true dns-server.workspace = true dns-service-client.workspace = true dropshot.workspace = true +futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true headers.workspace = true @@ -34,6 +35,7 @@ serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true +tokio.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d2ac0405fc..c6dc9fefe9 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -14,6 +14,8 @@ use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use dropshot::HandlerTaskMode; +use futures::future::BoxFuture; +use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; use nexus_test_interface::NexusServer; use nexus_types::external_api::params::UserId; @@ -39,7 +41,7 @@ use omicron_test_utils::dev; use oximeter_collector::Oximeter; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use slog::{debug, o, Logger}; +use slog::{debug, error, o, Logger}; use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -158,7 +160,7 @@ struct RackInitRequestBuilder { services: Vec, datasets: Vec, internal_dns_config: internal_dns::DnsConfigBuilder, - mac_addrs: Box>, + mac_addrs: Box + Send>, } impl RackInitRequestBuilder { @@ -254,11 +256,18 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub external_dns_zone_name: Option, pub external_dns: Option, pub internal_dns: Option, + dns_config: Option, pub silo_name: Option, pub user_name: Option, } +type StepInitFn<'a, N> = Box< + dyn for<'b> FnOnce( + &'b mut ControlPlaneTestContextBuilder<'a, N>, + ) -> BoxFuture<'b, ()>, +>; + impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { pub fn new( test_name: &'a str, @@ -290,11 +299,37 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { external_dns_zone_name: None, external_dns: None, internal_dns: None, + dns_config: None, silo_name: None, user_name: None, } } + pub async fn init_with_steps( + &mut self, + steps: Vec<(&str, StepInitFn<'a, N>)>, + timeout: Duration, + ) { + let log = self.logctx.log.new(o!("component" => "init_with_steps")); + for (step_name, step) in steps { + debug!(log, "Running step {step_name}"); + let step_fut = step(self); + match tokio::time::timeout(timeout, step_fut).await { + Ok(()) => {} + Err(_) => { + error!( + log, + "Timed out after {timeout:?} \ + while running step {step_name}, failing test" + ); + panic!( + "Timed out after {timeout:?} while running step {step_name}", + ); + } + } + } + } + pub async fn start_crdb(&mut self, populate: bool) { let populate = if populate { PopulateCrdb::FromEnvironmentSeed @@ -581,7 +616,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.nexus_internal_addr = Some(nexus_internal_addr); } - pub async fn populate_internal_dns(&mut self) -> DnsConfigParams { + pub async fn populate_internal_dns(&mut self) { let log = &self.logctx.log; debug!(log, "Populating Internal DNS"); @@ -604,18 +639,21 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { dns_config_client.dns_config_put(&dns_config).await.expect( "Failed to send initial DNS records to internal DNS server", ); - dns_config + self.dns_config = Some(dns_config); } // Perform RSS handoff pub async fn start_nexus_external( &mut self, - dns_config: DnsConfigParams, tls_certificates: Vec, ) { let log = &self.logctx.log; debug!(log, "Starting Nexus (external API)"); + let dns_config = self.dns_config.clone().expect( + "populate_internal_dns must be called before start_nexus_external", + ); + // Create a recovery silo let external_dns_zone_name = internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_string(); @@ -956,30 +994,102 @@ async fn setup_with_config_impl( sim_mode: sim::SimMode, initial_cert: Option, ) -> ControlPlaneTestContext { - builder.start_crdb_impl(populate).await; - builder.start_clickhouse().await; - builder.start_gateway().await; - builder.start_dendrite(SwitchLocation::Switch0).await; - builder.start_dendrite(SwitchLocation::Switch1).await; - builder.start_mgd(SwitchLocation::Switch0).await; - builder.start_mgd(SwitchLocation::Switch1).await; - builder.start_internal_dns().await; - builder.start_external_dns().await; - builder.start_nexus_internal().await; - builder.start_sled(sim_mode).await; - builder.start_crucible_pantry().await; - builder.scrimlet_dns_setup().await; - - // Give Nexus necessary information to find the Crucible Pantry - let dns_config = builder.populate_internal_dns().await; + const STEP_TIMEOUT: Duration = Duration::from_secs(60); builder - .start_nexus_external(dns_config, initial_cert.into_iter().collect()) + .init_with_steps( + vec![ + ( + "start_crdb", + Box::new(|builder| { + builder.start_crdb_impl(populate).boxed() + }), + ), + ( + "start_clickhouse", + Box::new(|builder| builder.start_clickhouse().boxed()), + ), + ( + "start_gateway", + Box::new(|builder| builder.start_gateway().boxed()), + ), + ( + "start_dendrite_switch0", + Box::new(|builder| { + builder.start_dendrite(SwitchLocation::Switch0).boxed() + }), + ), + ( + "start_dendrite_switch1", + Box::new(|builder| { + builder.start_dendrite(SwitchLocation::Switch1).boxed() + }), + ), + ( + "start_mgd_switch0", + Box::new(|builder| { + builder.start_mgd(SwitchLocation::Switch0).boxed() + }), + ), + ( + "start_mgd_switch1", + Box::new(|builder| { + builder.start_mgd(SwitchLocation::Switch1).boxed() + }), + ), + ( + "start_internal_dns", + Box::new(|builder| builder.start_internal_dns().boxed()), + ), + ( + "start_external_dns", + Box::new(|builder| builder.start_external_dns().boxed()), + ), + ( + "start_nexus_internal", + Box::new(|builder| builder.start_nexus_internal().boxed()), + ), + ( + "start_sled", + Box::new(move |builder| { + builder.start_sled(sim_mode).boxed() + }), + ), + ( + "start_crucible_pantry", + Box::new(|builder| builder.start_crucible_pantry().boxed()), + ), + ( + "scrimlet_dns_setup", + Box::new(|builder| builder.scrimlet_dns_setup().boxed()), + ), + ( + "populate_internal_dns", + Box::new(|builder| builder.populate_internal_dns().boxed()), + ), + ( + "start_nexus_external", + Box::new(|builder| { + builder + .start_nexus_external( + initial_cert.into_iter().collect(), + ) + .boxed() + }), + ), + ( + "start_oximeter", + Box::new(|builder| builder.start_oximeter().boxed()), + ), + ( + "start_producer_server", + Box::new(|builder| builder.start_producer_server().boxed()), + ), + ], + STEP_TIMEOUT, + ) .await; - builder.start_oximeter().await; - builder.start_producer_server().await; - builder.build() } From 42c0c8a32e863433370ecce8c1d8c1e86454bdbd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jan 2024 11:16:11 -0800 Subject: [PATCH 162/186] [sled agent] fail requests for non-NTP zones if time is not synchronized (#4778) Fixes https://github.com/oxidecomputer/omicron/issues/4776 --- sled-agent/src/bootstrap/pre_server.rs | 9 +- sled-agent/src/services.rs | 215 ++++++++++++++++++++++--- sled-agent/src/sled_agent.rs | 4 +- 3 files changed, 200 insertions(+), 28 deletions(-) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 02710ff583..e61e15d370 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -18,6 +18,7 @@ use crate::long_running_tasks::{ spawn_all_longrunning_tasks, LongRunningTaskHandles, }; use crate::services::ServiceManager; +use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; use crate::storage_monitor::UnderlayAccess; use camino::Utf8PathBuf; @@ -127,12 +128,18 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; + let time_sync = if let Some(true) = config.skip_timesync { + TimeSyncConfig::Skip + } else { + TimeSyncConfig::Normal + }; + let service_manager = ServiceManager::new( &base_log, ddm_admin_localhost_client.clone(), startup_networking, sled_mode, - config.skip_timesync, + time_sync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), long_running_task_handles.storage_manager.clone(), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ddfea5d596..e240fb4d03 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -202,6 +202,11 @@ pub enum Error { #[error("NTP zone not ready")] NtpZoneNotReady, + // This isn't exactly "NtpZoneNotReady" -- it can happen when the NTP zone + // is up, but time is still in the process of synchronizing. + #[error("Time not yet synchronized")] + TimeNotSynchronized, + #[error("Execution error: {0}")] ExecutionError(#[from] illumos_utils::ExecutionError), @@ -252,6 +257,9 @@ impl From for omicron_common::api::external::Error { err @ Error::RequestedConfigOutdated { .. } => { omicron_common::api::external::Error::conflict(&err.to_string()) } + err @ Error::TimeNotSynchronized => { + omicron_common::api::external::Error::unavail(&err.to_string()) + } _ => omicron_common::api::external::Error::InternalError { internal_message: err.to_string(), }, @@ -274,6 +282,27 @@ fn display_zone_init_errors(errors: &[(String, Box)]) -> String { output } +// Does this zone require time synchronization before it is initialized?" +// +// This function is somewhat conservative - the set of services +// that can be launched before timesync has completed is intentionally kept +// small, since it would be easy to add a service that expects time to be +// reasonably synchronized. +fn zone_requires_timesync(zone_type: &OmicronZoneType) -> bool { + match zone_type { + // These zones can be initialized and started before time has been + // synchronized. For the NTP zones, this should be self-evident -- + // we need the NTP zone to actually perform time synchronization! + // + // The DNS zone is a bit of an exception here, since the NTP zone + // itself may rely on DNS lookups as a dependency. + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::InternalDns { .. } => false, + _ => true, + } +} + /// Configuration parameters which modify the [`ServiceManager`]'s behavior. pub struct Config { /// Identifies the sled being configured @@ -504,18 +533,20 @@ enum SledLocalZone { }, } +type ZoneMap = BTreeMap; + /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, - skip_timesync: Option, + time_sync_config: TimeSyncConfig, time_synced: AtomicBool, switch_zone_maghemite_links: Vec, sidecar_revision: SidecarRevision, // Zones representing running services - zones: Mutex>, + zones: Mutex, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, @@ -540,6 +571,16 @@ struct SledAgentInfo { rack_network_config: Option, } +pub(crate) enum TimeSyncConfig { + // Waits for NTP to confirm that time has been synchronized. + Normal, + // Skips timesync unconditionally. + Skip, + // Fails timesync unconditionally. + #[cfg(test)] + Fail, +} + #[derive(Clone)] pub struct ServiceManager { inner: Arc, @@ -554,7 +595,7 @@ impl ServiceManager { /// - `bootstrap_networking`: Collection of etherstubs/VNICs set up when /// bootstrap agent begins /// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet). - /// - `skip_timesync`: If true, the sled always reports synced time. + /// - `time_sync_config`: Describes how the sled awaits synced time. /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. @@ -565,7 +606,7 @@ impl ServiceManager { ddmd_client: DdmAdminClient, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - skip_timesync: Option, + time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, storage: StorageHandle, @@ -582,7 +623,7 @@ impl ServiceManager { // Load the switch zone if it already exists? switch_zone: Mutex::new(SledLocalZone::Disabled), sled_mode, - skip_timesync, + time_sync_config, time_synced: AtomicBool::new(false), sidecar_revision, switch_zone_maghemite_links, @@ -2767,6 +2808,26 @@ impl ServiceManager { old_zones_set.difference(&requested_zones_set); let zones_to_be_added = requested_zones_set.difference(&old_zones_set); + // For each new zone request, ensure that we've sufficiently + // synchronized time. + // + // NOTE: This imposes a constraint, during initial setup, cold boot, + // etc, that NTP and the internal DNS system it depends on MUST be + // initialized prior to other zones. + let time_is_synchronized = + match self.timesync_get_locked(&existing_zones).await { + // Time is synchronized + Ok(TimeSync { sync: true, .. }) => true, + // Time is not synchronized, or we can't check + _ => false, + }; + for zone in zones_to_be_added.clone() { + if zone_requires_timesync(&zone.zone_type) && !time_is_synchronized + { + return Err(Error::TimeNotSynchronized); + } + } + // Destroy zones that should not be running for zone in zones_to_be_removed { let expected_zone_name = zone.zone_name(); @@ -2960,8 +3021,24 @@ impl ServiceManager { pub async fn timesync_get(&self) -> Result { let existing_zones = self.inner.zones.lock().await; + self.timesync_get_locked(&existing_zones).await + } + + async fn timesync_get_locked( + &self, + existing_zones: &tokio::sync::MutexGuard<'_, ZoneMap>, + ) -> Result { + let skip_timesync = match &self.inner.time_sync_config { + TimeSyncConfig::Normal => false, + TimeSyncConfig::Skip => true, + #[cfg(test)] + TimeSyncConfig::Fail => { + info!(self.inner.log, "Configured to fail timesync checks"); + return Err(Error::TimeNotSynchronized); + } + }; - if let Some(true) = self.inner.skip_timesync { + if skip_timesync { info!(self.inner.log, "Configured to skip timesync checks"); self.boottime_rewrite(); return Ok(TimeSync { @@ -3545,7 +3622,6 @@ mod test { svc, zone::MockZones, }; - use omicron_common::address::OXIMETER_PORT; use sled_storage::disk::{RawDisk, SyntheticDisk}; use sled_storage::manager::{FakeStorageManager, StorageHandle}; @@ -3558,6 +3634,7 @@ mod test { const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_oximeter"; + const EXPECTED_PORT: u16 = 12223; fn make_bootstrap_networking_config() -> BootstrapNetworking { BootstrapNetworking { @@ -3575,7 +3652,9 @@ mod test { } // Returns the expectations for a new service to be created. - fn expect_new_service() -> Vec> { + fn expect_new_service( + expected_zone_name_prefix: &str, + ) -> Vec> { illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst); // Create a VNIC let create_vnic_ctx = MockDladm::create_vnic_context(); @@ -3587,15 +3666,19 @@ mod test { ); // Install the Omicron Zone let install_ctx = MockZones::install_omicron_zone_context(); - install_ctx.expect().return_once(|_, _, name, _, _, _, _, _, _| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(()) - }); + let prefix = expected_zone_name_prefix.to_string(); + install_ctx.expect().return_once( + move |_, _, name, _, _, _, _, _, _| { + assert!(name.starts_with(&prefix)); + Ok(()) + }, + ); // Boot the zone. let boot_ctx = MockZones::boot_context(); - boot_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + let prefix = expected_zone_name_prefix.to_string(); + boot_ctx.expect().return_once(move |name| { + assert!(name.starts_with(&prefix)); Ok(()) }); @@ -3603,8 +3686,9 @@ mod test { // up the zone ID for the booted zone. This goes through // `MockZone::id` to find the zone and get its ID. let id_ctx = MockZones::id_context(); - id_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + let prefix = expected_zone_name_prefix.to_string(); + id_ctx.expect().return_once(move |name| { + assert!(name.starts_with(&prefix)); Ok(Some(1)) }); @@ -3720,19 +3804,35 @@ mod test { id: Uuid, generation: Generation, ) { - let _expectations = expect_new_service(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); + try_new_service_of_type( + mgr, + id, + generation, + OmicronZoneType::Oximeter { address }, + ) + .await + .expect("Could not create service"); + } + + async fn try_new_service_of_type( + mgr: &ServiceManager, + id: Uuid, + generation: Generation, + zone_type: OmicronZoneType, + ) -> Result<(), Error> { + let zone_prefix = format!("oxz_{}", zone_type.zone_type_str()); + let _expectations = expect_new_service(&zone_prefix); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation, zones: vec![OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type, }], }) .await - .unwrap(); } // Prepare to call "ensure" for a service which already exists. We should @@ -3743,7 +3843,7 @@ mod test { generation: Generation, ) { let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation, zones: vec![OmicronZoneConfig { @@ -3802,6 +3902,7 @@ mod test { // files. std::fs::write(dir.join("oximeter.tar.gz"), "Not a real file") .unwrap(); + std::fs::write(dir.join("ntp.tar.gz"), "Not a real file").unwrap(); } } @@ -3857,13 +3958,20 @@ mod test { } fn new_service_manager(self) -> ServiceManager { + self.new_service_manager_with_timesync(TimeSyncConfig::Skip) + } + + fn new_service_manager_with_timesync( + self, + time_sync_config: TimeSyncConfig, + ) -> ServiceManager { let log = &self.log; let mgr = ServiceManager::new( log, self.ddmd_client, make_bootstrap_networking_config(), SledMode::Auto, - Some(true), + time_sync_config, SidecarRevision::Physical("rev-test".to_string()), vec![], self.storage_handle, @@ -3927,6 +4035,63 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_service_before_timesync() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_ensure_service_before_timesync", + ); + let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + + let mgr = + helper.new_service_manager_with_timesync(TimeSyncConfig::Fail); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + let v1 = Generation::new(); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v1); + assert!(found.zones.is_empty()); + + let v2 = v1.next(); + let id = Uuid::new_v4(); + + // Should fail: time has not yet synchronized. + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); + let result = try_new_service_of_type( + &mgr, + id, + v2, + OmicronZoneType::Oximeter { address }, + ) + .await; + assert_matches::assert_matches!( + result, + Err(Error::TimeNotSynchronized) + ); + + // Should succeed: we don't care that time has not yet synchronized (for + // this specific service). + try_new_service_of_type( + &mgr, + id, + v2, + OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, + ) + .await + .unwrap(); + + drop_service_manager(mgr); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_ensure_service_which_already_exists() { let logctx = omicron_test_utils::dev::test_setup_log( @@ -3975,7 +4140,7 @@ mod test { // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. - let _expectations = expect_new_service(); + let _expectations = expect_new_service(EXPECTED_ZONE_NAME_PREFIX); let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); @@ -4049,7 +4214,7 @@ mod test { let _expectations = expect_new_services(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); let mut zones = vec![OmicronZoneConfig { id: id1, underlay_address: Ipv6Addr::LOCALHOST, @@ -4241,7 +4406,7 @@ mod test { let _expectations = expect_new_services(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); let mut zones = migrated_ledger.data().clone().to_omicron_zones_config().zones; zones.push(OmicronZoneConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5bc0f8d257..d094643cf9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -159,7 +159,7 @@ impl From for omicron_common::api::external::Error { impl From for dropshot::HttpError { fn from(err: Error) -> Self { match err { - crate::sled_agent::Error::Instance(instance_manager_error) => { + Error::Instance(instance_manager_error) => { match instance_manager_error { crate::instance_manager::Error::Instance( instance_error, @@ -196,7 +196,7 @@ impl From for dropshot::HttpError { e => HttpError::for_internal_error(e.to_string()), } } - crate::sled_agent::Error::ZoneBundle(ref inner) => match inner { + Error::ZoneBundle(ref inner) => match inner { BundleError::NoStorage | BundleError::Unavailable { .. } => { HttpError::for_unavail(None, inner.to_string()) } From cae1029a5ba60b71f8b3f012bfd2affa42d9b9e9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:18:42 -0800 Subject: [PATCH 163/186] Update Rust crate base64 to 0.21.6 (#4785) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2974dfe98e..33da6a7305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "c79fed4cdb43e993fcdadc7e58a09fd0e3e649c4436fa11da71c9f1f3ee7feb9" [[package]] name = "base64ct" diff --git a/Cargo.toml b/Cargo.toml index 8553a7244b..dfd57c4db1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,7 +151,7 @@ async-trait = "0.1.77" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } -base64 = "0.21.5" +base64 = "0.21.6" bb8 = "0.8.1" bcs = "0.1.6" bincode = "1.3.3" From 4aebef093ef44d5d18d34c435d7e74c9ed713b5e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:19:57 -0800 Subject: [PATCH 164/186] Update Rust crate libc to 0.2.152 (#4786) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33da6a7305..a4157829af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3649,9 +3649,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libdlpi-sys" diff --git a/Cargo.toml b/Cargo.toml index dfd57c4db1..b2d0e406da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -230,7 +230,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" -libc = "0.2.151" +libc = "0.2.152" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cff10b60ce..5688e133c0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -62,7 +62,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.151", features = ["extra_traits"] } +libc = { version = "0.2.152", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } @@ -165,7 +165,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.151", features = ["extra_traits"] } +libc = { version = "0.2.152", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } From 0c7be4c9459b59a8d00b078985f2bc1dd41c2074 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 10 Jan 2024 16:42:54 -0500 Subject: [PATCH 165/186] Addresses for propolis instances at SLED_PREFIX + 0xFFFF (#4777) Rather than allocating instance IPs starting at `SLED_PREFIX` + `RSS_RESERVED_ADDRESSES` + 1 where the `1` is the sled-agent allocated address of the GZ, we begin allocation from a larger block: `SLED_PREFIX` + `CP_SERVICES_RESERVED_ADDRESSES`. This gives us more room for nexus to allocate control plane services. Implements #4765 --- common/src/address.rs | 3 + nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/sled.rs | 6 +- nexus/db-queries/src/db/datastore/mod.rs | 38 ++----------- nexus/src/app/sagas/instance_common.rs | 6 +- nexus/src/app/sagas/instance_migrate.rs | 4 +- nexus/src/app/sagas/instance_start.rs | 4 +- nexus/tests/integration_tests/schema.rs | 72 ++++++++++++++++++++++++ schema/crdb/24.0.0/up.sql | 3 + schema/crdb/dbinit.sql | 4 +- 10 files changed, 97 insertions(+), 45 deletions(-) create mode 100644 schema/crdb/24.0.0/up.sql diff --git a/common/src/address.rs b/common/src/address.rs index 94361a2705..78eaee0bb4 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -165,6 +165,9 @@ const GZ_ADDRESS_INDEX: usize = 2; /// The maximum number of addresses per sled reserved for RSS. pub const RSS_RESERVED_ADDRESSES: u16 = 32; +// The maximum number of addresses per sled reserved for control plane services. +pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF; + /// Wraps an [`Ipv6Network`] with a compile-time prefix length. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)] #[schemars(rename = "Ipv6Subnet")] diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 02bdd2c349..ed819cba80 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(23, 0, 1); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(24, 0, 0); table! { disk (id) { diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 85a6b3139c..52968c27d5 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -57,7 +57,7 @@ pub struct Sled { pub ip: ipv6::Ipv6Addr, pub port: SqlU16, - /// The last IP address provided to an Oxide service on this sled + /// The last IP address provided to a propolis instance on this sled pub last_used_address: ipv6::Ipv6Addr, provision_state: SledProvisionState, @@ -183,7 +183,9 @@ impl SledUpdate { pub fn into_insertable(self) -> Sled { let last_used_address = { let mut segments = self.ip().segments(); - segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; + // We allocate the entire last segment to control plane services + segments[7] = + omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; ipv6::Ipv6Addr::from(Ipv6Addr::from(segments)) }; Sled { diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 93486771b5..d61ff15a3d 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -275,8 +275,8 @@ impl DataStore { self.pool_connection_unauthorized().await } - /// Return the next available IPv6 address for an Oxide service running on - /// the provided sled. + /// Return the next available IPv6 address for a propolis instance running + /// on the provided sled. pub async fn next_ipv6_address( &self, opctx: &OpContext, @@ -1286,7 +1286,6 @@ mod test { // Test sled-specific IPv6 address allocation #[tokio::test] async fn test_sled_ipv6_address_allocation() { - use omicron_common::address::RSS_RESERVED_ADDRESSES as STATIC_IPV6_ADDRESS_OFFSET; use std::net::Ipv6Addr; let logctx = dev::test_setup_log("test_sled_ipv6_address_allocation"); @@ -1322,41 +1321,14 @@ mod test { datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1de, - 0, - 0, - 0, - 0, - 0, - 2 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1de, 0, 0, 0, 0, 1, 0); assert_eq!(ip, expected_ip); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1de, - 0, - 0, - 0, - 0, - 0, - 3 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1de, 0, 0, 0, 0, 1, 1); assert_eq!(ip, expected_ip); let ip = datastore.next_ipv6_address(&opctx, sled2_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1df, - 0, - 0, - 0, - 0, - 0, - 2 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1df, 0, 0, 0, 0, 1, 0); assert_eq!(ip, expected_ip); let _ = db.cleanup().await; diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 438b92cb84..8f9197b03b 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -121,9 +121,9 @@ pub async fn destroy_vmm_record( Ok(()) } -/// Allocates a new IPv6 address for a service that will run on the supplied -/// sled. -pub(super) async fn allocate_sled_ipv6( +/// Allocates a new IPv6 address for a propolis instance that will run on the +/// supplied sled. +pub(super) async fn allocate_vmm_ipv6( opctx: &OpContext, datastore: &DataStore, sled_uuid: Uuid, diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 29c189efb4..1716953f04 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -7,7 +7,7 @@ use crate::app::instance::{ InstanceStateChangeError, InstanceStateChangeRequest, }; use crate::app::sagas::{ - declare_saga_actions, instance_common::allocate_sled_ipv6, + declare_saga_actions, instance_common::allocate_vmm_ipv6, }; use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -181,7 +181,7 @@ async fn sim_allocate_propolis_ip( &sagactx, ¶ms.serialized_authn, ); - allocate_sled_ipv6( + allocate_vmm_ipv6( &opctx, sagactx.user_data().datastore(), params.migrate_params.dst_sled_id, diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 8957a838e7..9d12bd8031 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -7,7 +7,7 @@ use std::net::Ipv6Addr; use super::{ - instance_common::allocate_sled_ipv6, NexusActionContext, NexusSaga, + instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; use crate::app::instance::InstanceStateChangeError; @@ -159,7 +159,7 @@ async fn sis_alloc_propolis_ip( ¶ms.serialized_authn, ); let sled_uuid = sagactx.lookup::("sled_id")?; - allocate_sled_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await + allocate_vmm_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await } async fn sis_create_vmm_record( diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 21ed99e010..f183b53282 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -20,6 +20,7 @@ use pretty_assertions::{assert_eq, assert_ne}; use similar_asserts; use slog::Logger; use std::collections::{BTreeMap, BTreeSet}; +use std::net::IpAddr; use std::path::PathBuf; use tokio::time::timeout; use tokio::time::Duration; @@ -192,6 +193,7 @@ enum AnySqlType { String(String), TextArray(Vec), Uuid(Uuid), + Inet(IpAddr), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to @@ -234,6 +236,12 @@ impl From for AnySqlType { } } +impl From for AnySqlType { + fn from(value: IpAddr) -> Self { + Self::Inet(value) + } +} + impl AnySqlType { fn as_str(&self) -> &str { match self { @@ -279,6 +287,9 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { ty, raw, )?)); } + if IpAddr::accepts(ty) { + return Ok(AnySqlType::Inet(IpAddr::from_sql(ty, raw)?)); + } use tokio_postgres::types::Kind; match ty.kind() { @@ -941,6 +952,13 @@ const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); +// "513D" -> "Sled" +const SLED1: Uuid = Uuid::from_u128(0x1111513d_5c3d_4647_83b0_8f3515da7be1); +const SLED2: Uuid = Uuid::from_u128(0x2222513d_5c3d_4647_83b0_8f3515da7be1); + +// "7AC4" -> "Rack" +const RACK1: Uuid = Uuid::from_u128(0x11117ac4_5c3d_4647_83b0_8f3515da7be1); + fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { Box::pin(async move { // Create two silos @@ -1024,6 +1042,56 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { }) } +fn before_24_0_0(client: &Client) -> BoxFuture<'_, ()> { + // IP addresses were pulled off dogfood sled 16 + Box::pin(async move { + // Create two sleds + client + .batch_execute(&format!( + "INSERT INTO sled + (id, time_created, time_modified, time_deleted, rcgen, rack_id, + is_scrimlet, serial_number, part_number, revision, + usable_hardware_threads, usable_physical_ram, reservoir_size, ip, + port, last_used_address, provision_state) VALUES + + ('{SLED1}', now(), now(), NULL, 1, '{RACK1}', true, 'abcd', 'defg', + '1', 64, 12345678, 77, 'fd00:1122:3344:104::1', 12345, + 'fd00:1122:3344:104::1ac', 'provisionable'), + ('{SLED2}', now(), now(), NULL, 1, '{RACK1}', false, 'zzzz', 'xxxx', + '2', 64, 12345678, 77,'fd00:1122:3344:107::1', 12345, + 'fd00:1122:3344:107::d4', 'provisionable'); + " + )) + .await + .expect("Failed to create sleds"); + }) +} + +fn after_24_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + // Confirm that the IP Addresses have the last 2 bytes changed to `0xFFFF` + let rows = client + .query("SELECT last_used_address FROM sled ORDER BY id", &[]) + .await + .expect("Failed to sled last_used_address"); + let last_used_addresses = process_rows(&rows); + + let expected_addr_1: IpAddr = + "fd00:1122:3344:104::ffff".parse().unwrap(); + let expected_addr_2: IpAddr = + "fd00:1122:3344:107::ffff".parse().unwrap(); + + assert_eq!( + last_used_addresses[0].values, + vec![ColumnValue::new("last_used_address", expected_addr_1)] + ); + assert_eq!( + last_used_addresses[1].values, + vec![ColumnValue::new("last_used_address", expected_addr_2)] + ); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -1037,6 +1105,10 @@ fn get_migration_checks() -> BTreeMap { SemverVersion(semver::Version::parse("23.0.0").unwrap()), DataMigrationFns { before: Some(before_23_0_0), after: after_23_0_0 }, ); + map.insert( + SemverVersion(semver::Version::parse("24.0.0").unwrap()), + DataMigrationFns { before: Some(before_24_0_0), after: after_24_0_0 }, + ); map } diff --git a/schema/crdb/24.0.0/up.sql b/schema/crdb/24.0.0/up.sql new file mode 100644 index 0000000000..91bd10ab9f --- /dev/null +++ b/schema/crdb/24.0.0/up.sql @@ -0,0 +1,3 @@ +UPDATE omicron.public.sled + SET last_used_address = (netmask(set_masklen(ip, 64)) & ip) + 0xFFFF + WHERE time_deleted is null; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e40c97972f..2105caabef 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -108,7 +108,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, - /* The last address allocated to an Oxide service on this sled. */ + /* The last address allocated to a propolis instance on this sled. */ last_used_address INET NOT NULL, /* The state of whether resources should be provisioned onto the sled */ @@ -3258,7 +3258,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '23.0.1', NULL) + ( TRUE, NOW(), NOW(), '24.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 05284564f82d835c06ad9ee822a498a99ab08fe7 Mon Sep 17 00:00:00 2001 From: Michael Zeller Date: Wed, 10 Jan 2024 19:08:21 -0500 Subject: [PATCH 166/186] The control plane should use libipcc (#4536) We should use libipcc in omicron rather than make direct calls via `ioctl`. The library will take care of hiding the implementation details from upstream consumers -- this becomes important in the future when communication with the service processor from the host OS physically changes with newer board design. Currently the only consumer in this repo is installinator but the control plane is about to start communicating with the RoT via IPCC as well. This PR introduces new bindings around `libipcc` and removes the old `ioctl` interfaces. --------- Co-authored-by: Andy Fiddaman --- .cargo/config | 45 ++++++- .github/buildomat/build-and-test.sh | 9 ++ Cargo.lock | 7 +- Cargo.toml | 6 +- gateway/Cargo.toml | 2 +- gateway/src/http_entrypoints.rs | 13 +- gateway/src/http_entrypoints/conversions.rs | 2 +- installinator/Cargo.toml | 2 +- installinator/src/artifact.rs | 4 +- ipcc-key-value/src/ioctl.rs | 126 ------------------- ipcc-key-value/src/ioctl_common.rs | 57 --------- ipcc-key-value/src/ioctl_stub.rs | 32 ----- {ipcc-key-value => ipcc}/Cargo.toml | 3 +- ipcc/build.rs | 16 +++ ipcc/src/ffi.rs | 83 +++++++++++++ ipcc/src/handle.rs | 129 ++++++++++++++++++++ ipcc/src/handle_stub.rs | 25 ++++ {ipcc-key-value => ipcc}/src/lib.rs | 116 +++++++++++++++--- openapi/gateway.json | 2 +- 19 files changed, 420 insertions(+), 259 deletions(-) delete mode 100644 ipcc-key-value/src/ioctl.rs delete mode 100644 ipcc-key-value/src/ioctl_common.rs delete mode 100644 ipcc-key-value/src/ioctl_stub.rs rename {ipcc-key-value => ipcc}/Cargo.toml (91%) create mode 100644 ipcc/build.rs create mode 100644 ipcc/src/ffi.rs create mode 100644 ipcc/src/handle.rs create mode 100644 ipcc/src/handle_stub.rs rename {ipcc-key-value => ipcc}/src/lib.rs (75%) diff --git a/.cargo/config b/.cargo/config index 6794e988ad..f658f146c9 100644 --- a/.cargo/config +++ b/.cargo/config @@ -7,13 +7,50 @@ [build] rustdocflags = "--document-private-items" -# On illumos, use `-znocompstrtab` to reduce link time. +# On illumos, use `-znocompstrtab` to reduce link time. We also add the Oxide +# specific platform directory to the RPATH where additional libraries can be +# found such as libipcc. # -# Note that these flags are overridden by a user's environment variable, so -# things critical to correctness probably don't belong here. +# Our reasoning for including `-R/usr/platform/oxide/lib/amd64` here: +# - Oxide specific features - This path contains Oxide specific libraries such +# as libipcc and will likely grow over time to include more functionality. +# - Avoid the rpaths crate - The rpaths crate was built to deal with runtime +# paths that are dynamic such as with libraries like libpq which can live in +# different locations based on OS. This path will only ever be found on +# illumos and will be tied directly to the Oxide platform. +# - Less developer burden - Having something like the ipcc crate emit +# `DEP_IPCC_LIBDIRS` means that we end up littering the repo with Cargo.toml +# and build.rs changes whenever ipcc shows up somewhere in the dependency +# tree. While initially exploring that path we ran into a significant number +# of tests failing due to not being able to find libipcc in the runtime path +# which can be confusing or surprising to someone doing work on omicron. +# +# We could also update Helios so that a symlink is created from +# /usr/platform/oxide/lib/amd64 into /usr/lib/64 but we opted to not take +# this route forward as it meant waiting for another round of updates on +# shared build machines and to buildomat itself. +# +# As documented at: +# https://doc.rust-lang.org/cargo/reference/config.html#buildrustflags +# +# There are four mutually exclusive sources of extra flags. They are checked in +# order, with the first one being used: +# 1. `CARGO_ENCODED_RUSTFLAGS` environment variable. +# 2. `RUSTFLAGS` environment variable. +# 3. All matching target..rustflags and target..rustflags config +# entries joined together. +# 4. build.rustflags config value. +# +# When overriding the defaults in this config by environment variable the user +# may need to manually pass the additional options found below. +# +# Note that other runtime paths should not be added here, but should instead +# reuse the infrastructure found in the `rpaths` crate which can be found in +# this repo. Those paths are usually platform specific and will vary based on a +# variety of things such as host OS. [target.x86_64-unknown-illumos] rustflags = [ - "-C", "link-arg=-Wl,-znocompstrtab" + "-C", "link-arg=-Wl,-znocompstrtab,-R/usr/platform/oxide/lib/amd64" ] # Set up `cargo xtask`. diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index eab64c528c..5cf086b1a3 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -4,6 +4,8 @@ set -o errexit set -o pipefail set -o xtrace +target_os=$1 + # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) @@ -48,6 +50,13 @@ ptime -m bash ./tools/install_builder_prerequisites.sh -y # banner build export RUSTFLAGS="-D warnings" +# When running on illumos we need to pass an additional runpath that is +# usually configured via ".cargo/config" but the `RUSTFLAGS` env variable +# takes precedence. This path contains oxide specific libraries such as +# libipcc. +if [[ $target_os == "illumos" ]]; then + RUSTFLAGS="-D warnings -C link-arg=-R/usr/platform/oxide/lib/amd64" +fi export RUSTDOCFLAGS="-D warnings" export TMPDIR=$TEST_TMPDIR export RUST_BACKTRACE=1 diff --git a/Cargo.lock b/Cargo.lock index a4157829af..7491f30dde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3306,7 +3306,7 @@ dependencies = [ "illumos-utils", "installinator-artifact-client", "installinator-common", - "ipcc-key-value", + "ipcc", "itertools 0.12.0", "libc", "omicron-common", @@ -3459,9 +3459,10 @@ dependencies = [ ] [[package]] -name = "ipcc-key-value" +name = "ipcc" version = "0.1.0" dependencies = [ + "cfg-if", "ciborium", "libc", "omicron-common", @@ -4712,7 +4713,7 @@ dependencies = [ "http", "hyper", "illumos-utils", - "ipcc-key-value", + "ipcc", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", diff --git a/Cargo.toml b/Cargo.toml index b2d0e406da..fbef04d3c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ members = [ "installinator", "internal-dns-cli", "internal-dns", - "ipcc-key-value", + "ipcc", "key-manager", "nexus", "nexus/authz-macros", @@ -105,7 +105,7 @@ default-members = [ "installinator", "internal-dns-cli", "internal-dns", - "ipcc-key-value", + "ipcc", "key-manager", "nexus", "nexus/authz-macros", @@ -225,7 +225,7 @@ installinator-artifactd = { path = "installinator-artifactd" } installinator-artifact-client = { path = "clients/installinator-artifact-client" } installinator-common = { path = "installinator-common" } internal-dns = { path = "internal-dns" } -ipcc-key-value = { path = "ipcc-key-value" } +ipcc = { path = "ipcc" } ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index f2e5f83a8a..450c4b445e 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -17,7 +17,7 @@ hex.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true -ipcc-key-value.workspace = true +ipcc.workspace = true omicron-common.workspace = true once_cell.workspace = true schemars.workspace = true diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index e33e8dd4a6..b5a765a8a8 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -443,11 +443,11 @@ pub struct ImageVersion { pub version: u32, } -// This type is a duplicate of the type in `ipcc-key-value`, and we provide a +// This type is a duplicate of the type in `ipcc`, and we provide a // `From<_>` impl to convert to it. We keep these types distinct to allow us to // choose different representations for MGS's HTTP API (this type) and the wire // format passed through the SP to installinator -// (`ipcc_key_value::InstallinatorImageId`), although _currently_ they happen to +// (`ipcc::InstallinatorImageId`), although _currently_ they happen to // be defined identically. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, @@ -1292,7 +1292,7 @@ async fn sp_power_state_set( /// Set the installinator image ID the sled should use for recovery. /// -/// This value can be read by the host via IPCC; see the `ipcc-key-value` crate. +/// This value can be read by the host via IPCC; see the `ipcc` crate. #[endpoint { method = PUT, path = "/sp/{type}/{slot}/ipcc/installinator-image-id", @@ -1302,14 +1302,13 @@ async fn sp_installinator_image_id_set( path: Path, body: TypedBody, ) -> Result { - use ipcc_key_value::Key; + use ipcc::Key; let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); let sp = apictx.mgmt_switch.sp(sp_id)?; - let image_id = - ipcc_key_value::InstallinatorImageId::from(body.into_inner()); + let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); sp.set_ipcc_key_lookup_value( Key::InstallinatorImageId as u8, @@ -1330,7 +1329,7 @@ async fn sp_installinator_image_id_delete( rqctx: RequestContext>, path: Path, ) -> Result { - use ipcc_key_value::Key; + use ipcc::Key; let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); diff --git a/gateway/src/http_entrypoints/conversions.rs b/gateway/src/http_entrypoints/conversions.rs index 1182163bcc..a4aef7425e 100644 --- a/gateway/src/http_entrypoints/conversions.rs +++ b/gateway/src/http_entrypoints/conversions.rs @@ -397,7 +397,7 @@ impl From for HostStartupOptions { } } -impl From for ipcc_key_value::InstallinatorImageId { +impl From for ipcc::InstallinatorImageId { fn from(id: InstallinatorImageId) -> Self { Self { update_id: id.update_id, diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index d489e73ec1..43966d1202 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -20,7 +20,7 @@ http.workspace = true illumos-utils.workspace = true installinator-artifact-client.workspace = true installinator-common.workspace = true -ipcc-key-value.workspace = true +ipcc.workspace = true itertools.workspace = true libc.workspace = true omicron-common.workspace = true diff --git a/installinator/src/artifact.rs b/installinator/src/artifact.rs index f74d7b7f06..734759a2c2 100644 --- a/installinator/src/artifact.rs +++ b/installinator/src/artifact.rs @@ -9,7 +9,7 @@ use clap::Args; use futures::StreamExt; use installinator_artifact_client::ClientError; use installinator_common::EventReport; -use ipcc_key_value::{InstallinatorImageId, Ipcc}; +use ipcc::{InstallinatorImageId, Ipcc}; use omicron_common::update::{ArtifactHash, ArtifactHashId}; use tokio::sync::mpsc; use uuid::Uuid; @@ -47,7 +47,7 @@ pub(crate) struct ArtifactIdOpts { impl ArtifactIdOpts { pub(crate) fn resolve(&self) -> Result { if self.from_ipcc { - let ipcc = Ipcc::open().context("error opening IPCC")?; + let ipcc = Ipcc::new().context("error opening IPCC")?; ipcc.installinator_image_id() .context("error retrieving installinator image ID") } else { diff --git a/ipcc-key-value/src/ioctl.rs b/ipcc-key-value/src/ioctl.rs deleted file mode 100644 index b0524e973f..0000000000 --- a/ipcc-key-value/src/ioctl.rs +++ /dev/null @@ -1,126 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! IPCC `ioctl` interface. -//! -//! This module is tightly-coupled to the host OS image, and should only be used -//! by services bundled with it (e.g., sled-agent and installinator). - -use crate::InstallinatorImageId; -use crate::InstallinatorImageIdError; -use crate::IpccKey; -use crate::IpccKeyLookupError; -use crate::PingError; -use crate::Pong; -use libc::c_int; -use std::fs::File; -use std::io; -use std::os::unix::prelude::AsRawFd; - -pub struct Ipcc { - file: File, -} - -impl Ipcc { - pub fn open() -> io::Result { - let file = File::options().read(true).write(true).open(IPCC_DEV)?; - Ok(Self { file }) - } - - pub fn ping(&self) -> Result { - const EXPECTED_REPLY: &[u8] = b"pong"; - - let mut buf = [0; EXPECTED_REPLY.len()]; - let n = self.key_lookup(IpccKey::Ping, &mut buf)?; - let buf = &buf[..n]; - - if buf == EXPECTED_REPLY { - Ok(Pong) - } else { - Err(PingError::UnexpectedReply(buf.to_vec())) - } - } - - pub fn installinator_image_id( - &self, - ) -> Result { - let mut buf = [0; InstallinatorImageId::CBOR_SERIALIZED_SIZE]; - let n = self.key_lookup(IpccKey::InstallinatorImageId, &mut buf)?; - let id = InstallinatorImageId::deserialize(&buf[..n]) - .map_err(InstallinatorImageIdError::DeserializationFailed)?; - Ok(id) - } - - fn key_lookup( - &self, - key: IpccKey, - buf: &mut [u8], - ) -> Result { - let mut kl = IpccKeyLookup { - key: key as u8, - buflen: u16::try_from(buf.len()).unwrap_or(u16::MAX), - result: 0, - datalen: 0, - buf: buf.as_mut_ptr(), - }; - - let result = unsafe { - libc::ioctl( - self.file.as_raw_fd(), - IPCC_KEYLOOKUP, - &mut kl as *mut IpccKeyLookup, - ) - }; - - if result != 0 { - let error = io::Error::last_os_error(); - return Err(IpccKeyLookupError::IoctlFailed { key, error }); - } - - match kl.result { - IPCC_KEYLOOKUP_SUCCESS => Ok(usize::from(kl.datalen)), - IPCC_KEYLOOKUP_UNKNOWN_KEY => { - Err(IpccKeyLookupError::UnknownKey { key }) - } - IPCC_KEYLOOKUP_NO_VALUE => { - Err(IpccKeyLookupError::NoValueForKey { key }) - } - IPCC_KEYLOOKUP_BUFFER_TOO_SMALL => { - Err(IpccKeyLookupError::BufferTooSmallForValue { key }) - } - _ => Err(IpccKeyLookupError::UnknownResultValue { - key, - result: kl.result, - }), - } - } -} - -// -------------------------------------------------------------------- -// Constants and structures from stlouis `usr/src/uts/oxide/sys/ipcc.h` -// -------------------------------------------------------------------- - -const IPCC_DEV: &str = "/dev/ipcc"; - -const IPCC_IOC: c_int = - ((b'i' as c_int) << 24) | ((b'c' as c_int) << 16) | ((b'c' as c_int) << 8); - -const IPCC_KEYLOOKUP: c_int = IPCC_IOC | 4; - -const IPCC_KEYLOOKUP_SUCCESS: u8 = 0; -const IPCC_KEYLOOKUP_UNKNOWN_KEY: u8 = 1; -const IPCC_KEYLOOKUP_NO_VALUE: u8 = 2; -const IPCC_KEYLOOKUP_BUFFER_TOO_SMALL: u8 = 3; - -#[derive(Debug, Clone, Copy)] -#[repr(C)] -struct IpccKeyLookup { - key: u8, - buflen: u16, - result: u8, - datalen: u16, - buf: *mut u8, -} diff --git a/ipcc-key-value/src/ioctl_common.rs b/ipcc-key-value/src/ioctl_common.rs deleted file mode 100644 index 670cc7bff2..0000000000 --- a/ipcc-key-value/src/ioctl_common.rs +++ /dev/null @@ -1,57 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! This module contains types shared between the real (illumos-only) -//! `crate::ioctl` module and the generic `crate::ioctl_stub` module. - -use std::io; -use thiserror::Error; - -/// IPCC keys; the source of truth for these is RFD 316 + the -/// `host-sp-messages` crate in hubris. -#[derive(Debug, Clone, Copy)] -#[repr(u8)] -pub enum IpccKey { - Ping = 0, - InstallinatorImageId = 1, -} - -#[derive(Debug, Error)] -pub enum IpccKeyLookupError { - #[error("IPCC key lookup ioctl failed for key {key:?}: {error}")] - IoctlFailed { key: IpccKey, error: io::Error }, - #[error("IPCC key lookup failed for key {key:?}: unknown key")] - UnknownKey { key: IpccKey }, - #[error("IPCC key lookup failed for key {key:?}: no value for key")] - NoValueForKey { key: IpccKey }, - #[error( - "IPCC key lookup failed for key {key:?}: buffer too small for value" - )] - BufferTooSmallForValue { key: IpccKey }, - #[error( - "IPCC key lookup failed for key {key:?}: unknown result value {result}" - )] - UnknownResultValue { key: IpccKey, result: u8 }, -} - -#[derive(Debug, Error)] -pub enum InstallinatorImageIdError { - #[error(transparent)] - IpccKeyLookupError(#[from] IpccKeyLookupError), - #[error("deserializing installinator image ID failed: {0}")] - DeserializationFailed(String), -} - -#[derive(Debug, Error)] -pub enum PingError { - #[error(transparent)] - IpccKeyLookupError(#[from] IpccKeyLookupError), - #[error("unexpected reply from SP (expected `pong`: {0:?})")] - UnexpectedReply(Vec), -} - -#[derive(Debug, Clone, Copy)] -pub struct Pong; diff --git a/ipcc-key-value/src/ioctl_stub.rs b/ipcc-key-value/src/ioctl_stub.rs deleted file mode 100644 index cbf54b3eb4..0000000000 --- a/ipcc-key-value/src/ioctl_stub.rs +++ /dev/null @@ -1,32 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! Stub definition of the `Ipcc` type for compiling (but not running) on -//! non-Oxide systems. - -use crate::InstallinatorImageId; -use crate::InstallinatorImageIdError; -use crate::PingError; -use crate::Pong; -use std::io; - -pub struct Ipcc {} - -impl Ipcc { - pub fn open() -> io::Result { - panic!("ipcc unavailable on this platform") - } - - pub fn ping(&self) -> Result { - panic!("ipcc unavailable on this platform") - } - - pub fn installinator_image_id( - &self, - ) -> Result { - panic!("ipcc unavailable on this platform") - } -} diff --git a/ipcc-key-value/Cargo.toml b/ipcc/Cargo.toml similarity index 91% rename from ipcc-key-value/Cargo.toml rename to ipcc/Cargo.toml index 04aea9f939..98a781ab86 100644 --- a/ipcc-key-value/Cargo.toml +++ b/ipcc/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "ipcc-key-value" +name = "ipcc" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -12,6 +12,7 @@ serde.workspace = true thiserror.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +cfg-if.workspace = true [dev-dependencies] omicron-common = { workspace = true, features = ["testing"] } diff --git a/ipcc/build.rs b/ipcc/build.rs new file mode 100644 index 0000000000..a64133dac2 --- /dev/null +++ b/ipcc/build.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// This path is where Oxide specific libraries live on helios systems. +#[cfg(target_os = "illumos")] +static OXIDE_PLATFORM: &str = "/usr/platform/oxide/lib/amd64/"; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + #[cfg(target_os = "illumos")] + { + println!("cargo:rustc-link-arg=-Wl,-R{}", OXIDE_PLATFORM); + println!("cargo:rustc-link-search={}", OXIDE_PLATFORM); + } +} diff --git a/ipcc/src/ffi.rs b/ipcc/src/ffi.rs new file mode 100644 index 0000000000..420c1ddcde --- /dev/null +++ b/ipcc/src/ffi.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +use std::ffi::{c_char, c_int, c_uint}; + +/// Opaque libipcc handle +#[repr(C)] +pub(crate) struct libipcc_handle_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// Indicates that there was no error. Used as the initialized value when +/// calling into libipcc. +pub(crate) const LIBIPCC_ERR_OK: libipcc_err_t = 0; + +/// Indicates that there was a memory allocation error. The system error +/// contains the specific errno. +pub(crate) const LIBIPCC_ERR_NO_MEM: libipcc_err_t = 1; + +/// One of the function parameters does not pass validation. There will be more +/// detail available via libipcc_errmsg(). +pub(crate) const LIBIPCC_ERR_INVALID_PARAM: libipcc_err_t = 2; + +/// An internal error occurred. There will be more detail available via +/// libipcc_errmsg() and libipcc_syserr(). +pub(crate) const LIBIPCC_ERR_INTERNAL: libipcc_err_t = 3; + +/// The requested lookup key was not known to the SP. +pub(crate) const LIBIPCC_ERR_KEY_UNKNOWN: libipcc_err_t = 4; + +/// The value for the requested lookup key was too large for the +/// supplied buffer. +pub(crate) const LIBIPCC_ERR_KEY_BUFTOOSMALL: libipcc_err_t = 5; + +/// An attempt to write to a key failed because the key is read-only. +pub(crate) const LIBIPCC_ERR_KEY_READONLY: libipcc_err_t = 6; + +/// An attempt to write to a key failed because the passed value is too +/// long. +pub(crate) const LIBIPCC_ERR_KEY_VALTOOLONG: libipcc_err_t = 7; + +/// Compression or decompression failed. If appropriate, libipcc_syserr() will +/// return the Z_ error from zlib. +pub(crate) const LIBIPCC_ERR_KEY_ZERR: libipcc_err_t = 8; +pub(crate) type libipcc_err_t = c_uint; + +/// Maxium length of an error message retrieved by libipcc_errmsg(). +pub(crate) const LIBIPCC_ERR_LEN: usize = 1024; + +/// Flags that can be passed to libipcc when looking up a key. Today this is +/// used for looking up a compressed key, however nothing in the public API of +/// this crate takes advantage of this. +pub(crate) type libipcc_key_flag_t = ::std::os::raw::c_uint; + +#[link(name = "ipcc")] +extern "C" { + pub(crate) fn libipcc_init( + lihp: *mut *mut libipcc_handle_t, + libipcc_errp: *mut libipcc_err_t, + syserrp: *mut c_int, + errmsg: *const c_char, + errlen: usize, + ) -> bool; + pub(crate) fn libipcc_fini(lih: *mut libipcc_handle_t); + pub(crate) fn libipcc_err(lih: *mut libipcc_handle_t) -> libipcc_err_t; + pub(crate) fn libipcc_syserr(lih: *mut libipcc_handle_t) -> c_int; + pub(crate) fn libipcc_errmsg(lih: *mut libipcc_handle_t) -> *const c_char; + pub(crate) fn libipcc_keylookup( + lih: *mut libipcc_handle_t, + key: u8, + bufp: *mut *mut u8, + lenp: *mut usize, + flags: libipcc_key_flag_t, + ) -> bool; +} diff --git a/ipcc/src/handle.rs b/ipcc/src/handle.rs new file mode 100644 index 0000000000..91b71a6ce3 --- /dev/null +++ b/ipcc/src/handle.rs @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +use std::{ + ffi::{c_int, CStr, CString}, + ptr, +}; + +use crate::IpccError; +use crate::{ffi::*, IpccErrorInner}; + +pub struct IpccHandle(*mut libipcc_handle_t); + +impl Drop for IpccHandle { + fn drop(&mut self) { + unsafe { + libipcc_fini(self.0); + } + } +} +fn ipcc_fatal_error>( + context: C, + lerr: libipcc_err_t, + syserr: c_int, + errmsg: CString, +) -> IpccError { + let context = context.into(); + let syserr = if syserr == 0 { + "no system errno".to_string() + } else { + std::io::Error::from_raw_os_error(syserr).to_string() + }; + let inner = IpccErrorInner { + context, + errmsg: errmsg.to_string_lossy().into_owned(), + syserr, + }; + match lerr { + LIBIPCC_ERR_OK => panic!("called fatal on LIBIPCC_ERR_OK"), + LIBIPCC_ERR_NO_MEM => IpccError::NoMem(inner), + LIBIPCC_ERR_INVALID_PARAM => IpccError::InvalidParam(inner), + LIBIPCC_ERR_INTERNAL => IpccError::Internal(inner), + LIBIPCC_ERR_KEY_UNKNOWN => IpccError::KeyUnknown(inner), + LIBIPCC_ERR_KEY_BUFTOOSMALL => IpccError::KeyBufTooSmall(inner), + LIBIPCC_ERR_KEY_READONLY => IpccError::KeyReadonly(inner), + LIBIPCC_ERR_KEY_VALTOOLONG => IpccError::KeyValTooLong(inner), + LIBIPCC_ERR_KEY_ZERR => IpccError::KeyZerr(inner), + _ => IpccError::UnknownErr(inner), + } +} + +impl IpccHandle { + pub fn new() -> Result { + let mut ipcc_handle: *mut libipcc_handle_t = ptr::null_mut(); + // We subtract 1 from the length of the inital vector since CString::new + // will append a nul for us. + // Safety: Unwrapped because we guarantee that the supplied bytes + // contain no 0 bytes up front. + let errmsg = CString::new(vec![1; LIBIPCC_ERR_LEN - 1]).unwrap(); + let errmsg_len = errmsg.as_bytes().len(); + let errmsg_ptr = errmsg.into_raw(); + let mut lerr = LIBIPCC_ERR_OK; + let mut syserr = 0; + if !unsafe { + libipcc_init( + &mut ipcc_handle, + &mut lerr, + &mut syserr, + errmsg_ptr, + errmsg_len, + ) + } { + // Safety: CString::from_raw retakes ownership of a CString + // transferred to C via CString::into_raw. We are calling into_raw() + // above so it is safe to turn this back into it's owned variant. + let errmsg = unsafe { CString::from_raw(errmsg_ptr) }; + return Err(ipcc_fatal_error( + "Could not init libipcc handle", + lerr, + syserr, + errmsg, + )); + } + + Ok(IpccHandle(ipcc_handle)) + } + + fn fatal>(&self, context: C) -> IpccError { + let lerr = unsafe { libipcc_err(self.0) }; + let errmsg = unsafe { libipcc_errmsg(self.0) }; + // Safety: CStr::from_ptr is documented as safe if: + // 1. The pointer contains a valid null terminator at the end of + // the string + // 2. The pointer is valid for reads of bytes up to and including + // the null terminator + // 3. The memory referenced by the return CStr is not mutated for + // the duration of lifetime 'a + // + // (1) is true because this crate initializes space for an error message + // via CString::new which adds a terminator on our behalf. + // (2) should be guaranteed by libipcc itself since it is writing error + // messages into the CString backed buffer that we gave it. + // (3) We aren't currently mutating the memory referenced by the + // CStr, and we are creating an owned copy of the data immediately so + // that it can outlive the lifetime of the libipcc handle if needed. + let errmsg = unsafe { CStr::from_ptr(errmsg) }.to_owned(); + let syserr = unsafe { libipcc_syserr(self.0) }; + ipcc_fatal_error(context, lerr, syserr, errmsg) + } + + pub(crate) fn key_lookup( + &self, + key: u8, + buf: &mut [u8], + ) -> Result { + let mut lenp = buf.len(); + + if !unsafe { + libipcc_keylookup(self.0, key, &mut buf.as_mut_ptr(), &mut lenp, 0) + } { + return Err(self.fatal(format!("lookup of key {key} failed"))); + } + + Ok(lenp) + } +} diff --git a/ipcc/src/handle_stub.rs b/ipcc/src/handle_stub.rs new file mode 100644 index 0000000000..bc4b84b7fe --- /dev/null +++ b/ipcc/src/handle_stub.rs @@ -0,0 +1,25 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +use crate::IpccError; + +/// This stub and it's implementation are used for non-illumos platforms which +/// lack libipcc. +pub struct IpccHandle; + +impl IpccHandle { + pub fn new() -> Result { + panic!("ipcc unavailable on this platform") + } + + pub(crate) fn key_lookup( + &self, + _key: u8, + _buf: &mut [u8], + ) -> Result { + panic!("ipcc unavailable on this platform") + } +} diff --git a/ipcc-key-value/src/lib.rs b/ipcc/src/lib.rs similarity index 75% rename from ipcc-key-value/src/lib.rs rename to ipcc/src/lib.rs index 16e6685018..e997c51230 100644 --- a/ipcc-key-value/src/lib.rs +++ b/ipcc/src/lib.rs @@ -4,26 +4,28 @@ // Copyright 2023 Oxide Computer Company -//! Utilities for key/value pairs passed from the control plane to the SP -//! (through MGS) to the host (through the host/SP uart) via IPCC. +//! An interface to libipcc (inter-processor communications channel) which +//! currently supports looking up values stored in the SP by key. These +//! values are variously static, passed from the control plane to the SP +//! (through MGS) or set from userland via libipcc. +use cfg_if::cfg_if; use omicron_common::update::ArtifactHash; use serde::Deserialize; use serde::Serialize; +use thiserror::Error; use uuid::Uuid; -mod ioctl_common; -pub use ioctl_common::*; - -#[cfg(target_os = "illumos")] -mod ioctl; -#[cfg(target_os = "illumos")] -pub use ioctl::Ipcc; - -#[cfg(not(target_os = "illumos"))] -mod ioctl_stub; -#[cfg(not(target_os = "illumos"))] -pub use ioctl_stub::Ipcc; +cfg_if! { + if #[cfg(target_os = "illumos")] { + mod ffi; + mod handle; + use handle::IpccHandle; + } else { + mod handle_stub; + use handle_stub::IpccHandle; + } +} #[cfg(test)] use proptest::arbitrary::any; @@ -38,9 +40,9 @@ use proptest::strategy::Strategy; #[repr(u8)] pub enum Key { /// Always responds `"pong"`. - Ping = 0, + Ping = IpccKey::Ping as u8, /// The value should be an encoded [`InstallinatorImageId`]. - InstallinatorImageId = 1, + InstallinatorImageId = IpccKey::InstallinatorImageId as u8, } /// Description of the images `installinator` needs to fetch from a peer on the @@ -135,10 +137,84 @@ impl InstallinatorImageId { } } -// TODO Add ioctl wrappers? `installinator` is the only client for -// `Key::InstallinatorImageId`, but we might grow other keys for other clients, -// at which point we probably want all the ioctl wrapping to happen in one -// place. +#[derive(Debug, Error)] +pub enum InstallinatorImageIdError { + #[error(transparent)] + Ipcc(#[from] IpccError), + #[error("deserializing installinator image ID failed: {0}")] + DeserializationFailed(String), +} + +#[derive(Error, Debug)] +pub enum IpccError { + #[error("Memory allocation error")] + NoMem(#[source] IpccErrorInner), + #[error("Invalid parameter")] + InvalidParam(#[source] IpccErrorInner), + #[error("Internal error occurred")] + Internal(#[source] IpccErrorInner), + #[error("Requested lookup key was not known to the SP")] + KeyUnknown(#[source] IpccErrorInner), + #[error("Value for the requested lookup key was too large for the supplied buffer")] + KeyBufTooSmall(#[source] IpccErrorInner), + #[error("Attempted to write to read-only key")] + KeyReadonly(#[source] IpccErrorInner), + #[error("Attempted write to key failed because the value is too long")] + KeyValTooLong(#[source] IpccErrorInner), + #[error("Compression or decompression failed")] + KeyZerr(#[source] IpccErrorInner), + #[error("Unknown libipcc error")] + UnknownErr(#[source] IpccErrorInner), +} + +#[derive(Error, Debug)] +#[error("{context}: {errmsg} ({syserr})")] +pub struct IpccErrorInner { + pub context: String, + pub errmsg: String, + pub syserr: String, +} + +/// These are the IPCC keys we can look up. +/// NB: These keys match the definitions found in libipcc (RFD 316) and should +/// match the values in `[ipcc::Key]` one-to-one. +#[derive(Debug, Clone, Copy)] +#[allow(dead_code)] +#[repr(u8)] +enum IpccKey { + Ping = 0, + InstallinatorImageId = 1, + Inventory = 2, + System = 3, + Dtrace = 4, +} + +/// Interface to the inter-processor communications channel. +/// For more information see rfd 316. +pub struct Ipcc { + handle: IpccHandle, +} + +impl Ipcc { + /// Creates a new `Ipcc` instance. + pub fn new() -> Result { + let handle = IpccHandle::new()?; + Ok(Self { handle }) + } + + /// Returns the current `InstallinatorImageId`. + pub fn installinator_image_id( + &self, + ) -> Result { + let mut buf = [0; InstallinatorImageId::CBOR_SERIALIZED_SIZE]; + let n = self + .handle + .key_lookup(IpccKey::InstallinatorImageId as u8, &mut buf)?; + let id = InstallinatorImageId::deserialize(&buf[..n]) + .map_err(InstallinatorImageIdError::DeserializationFailed)?; + Ok(id) + } +} #[cfg(test)] mod tests { diff --git a/openapi/gateway.json b/openapi/gateway.json index 9eacbe122d..5961b670ed 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -1129,7 +1129,7 @@ "/sp/{type}/{slot}/ipcc/installinator-image-id": { "put": { "summary": "Set the installinator image ID the sled should use for recovery.", - "description": "This value can be read by the host via IPCC; see the `ipcc-key-value` crate.", + "description": "This value can be read by the host via IPCC; see the `ipcc` crate.", "operationId": "sp_installinator_image_id_set", "parameters": [ { From 28957f1cab39da836043a891d0b62229560413d1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 05:23:44 +0000 Subject: [PATCH 167/186] Update taiki-e/install-action digest to a6173a9 (#4797) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`2f4c386` -> `a6173a9`](https://togithub.com/taiki-e/install-action/compare/2f4c386...a6173a9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index f758bd79b9..c940f21fb2 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@2f4c386a81aeab009d470320dfc6e0930ee4e064 # v2 + uses: taiki-e/install-action@a6173a9cbc8927eb1def26c72d123d297efb1b10 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From aa3f26dd35b9346e698d58a86170b13cf8046ccb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 07:03:52 +0000 Subject: [PATCH 168/186] Update Rust crate vsss-rs to 3.3.4 (#4799) --- Cargo.lock | 4 ++-- bootstore/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7491f30dde..3cf8bfd887 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9527,9 +9527,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vsss-rs" -version = "3.3.2" +version = "3.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18cf462590a38451396633ef9771f3e9afcf439309137fd6c6eaaec0fb38252" +checksum = "196bbee60607a195bc850e94f0e040bd090e45794ad8df0e9c5a422b9975a00f" dependencies = [ "curve25519-dalek", "elliptic-curve", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 1eade5afe8..37280f6dcb 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -27,7 +27,7 @@ slog.workspace = true thiserror.workspace = true tokio.workspace = true uuid.workspace = true -vsss-rs = { version = "3.3.2", features = ["std", "curve25519"] } +vsss-rs = { version = "3.3.4", features = ["std", "curve25519"] } zeroize.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. From 295b202f06dde1f05cc49db9381a596c2c742660 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 08:16:49 +0000 Subject: [PATCH 169/186] Update Swatinem/rust-cache action to v2.7.2 (#4800) --- .github/workflows/rust.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 23ccc7e61f..fff5f3e6c2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,7 +34,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -64,7 +64,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -94,7 +94,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version From 1416ee06f367fc1e8aa6c28d65b3b88a7300c79a Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 11 Jan 2024 08:37:51 -0800 Subject: [PATCH 170/186] add --include-deleted switch to omdb db commands (#4648) omdb's database commands are slightly inconsistent about whether they include soft-deleted records in their outputs: most queries don't, but instances do. Add a command-line option for this (defaulting to "don't include") and update subcommands that construct their own queries to honor it. The exceptions are (a) subcommands that call datastore functions directly (these keep their existing behavior) and (b) validation commands that e.g. reason about reference graphs involving deleted and not-deleted objects. Also, clean up a couple of callers of `check_limit` who weren't actually setting a LIMIT clause in their queries. (The check is harmless in this case, but will produce spurious warnings if the queries involved return exactly the right number of rows.) Tested by running `omdb db` commands on the dogfood rack. Fixes #4647. --- dev-tools/omdb/src/bin/omdb/db.rs | 179 +++++++++++++++----------- dev-tools/omdb/tests/usage_errors.out | 4 + 2 files changed, 111 insertions(+), 72 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index f58fd57b9d..23e9206506 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -133,6 +133,15 @@ pub struct DbArgs { #[clap(long, env("OMDB_DB_URL"))] db_url: Option, + #[clap(flatten)] + fetch_opts: DbFetchOptions, + + #[command(subcommand)] + command: DbCommands, +} + +#[derive(Debug, Args)] +pub struct DbFetchOptions { /// limit to apply to queries that fetch rows #[clap( long = "fetch-limit", @@ -141,8 +150,10 @@ pub struct DbArgs { )] fetch_limit: NonZeroU32, - #[command(subcommand)] - command: DbCommands, + /// whether to include soft-deleted records when enumerating objects that + /// can be soft-deleted + #[clap(long, default_value_t = false)] + include_deleted: bool, } /// Subcommands that query or update the database @@ -398,30 +409,30 @@ impl DbArgs { command: DiskCommands::Info(uuid), }) => cmd_db_disk_info(&opctx, &datastore, uuid).await, DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { - cmd_db_disk_list(&datastore, self.fetch_limit).await + cmd_db_disk_list(&datastore, &self.fetch_opts).await } DbCommands::Disks(DiskArgs { command: DiskCommands::Physical(uuid), }) => { - cmd_db_disk_physical(&opctx, &datastore, self.fetch_limit, uuid) + cmd_db_disk_physical(&opctx, &datastore, &self.fetch_opts, uuid) .await } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { - cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await + cmd_db_dns_show(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Dns(DnsArgs { command: DnsCommands::Diff(args) }) => { - cmd_db_dns_diff(&opctx, &datastore, self.fetch_limit, args) + cmd_db_dns_diff(&opctx, &datastore, &self.fetch_opts, args) .await } DbCommands::Dns(DnsArgs { command: DnsCommands::Names(args) }) => { - cmd_db_dns_names(&opctx, &datastore, self.fetch_limit, args) + cmd_db_dns_names(&opctx, &datastore, &self.fetch_opts, args) .await } DbCommands::Inventory(inventory_args) => { cmd_db_inventory( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, inventory_args, ) .await @@ -432,7 +443,7 @@ impl DbArgs { cmd_db_services_list_instances( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, ) .await } @@ -442,21 +453,21 @@ impl DbArgs { cmd_db_services_list_by_sled( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, ) .await } DbCommands::Sleds => { - cmd_db_sleds(&opctx, &datastore, self.fetch_limit).await + cmd_db_sleds(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Instances => { - cmd_db_instances(&opctx, &datastore, self.fetch_limit).await + cmd_db_instances(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Network(NetworkArgs { command: NetworkCommands::ListEips, verbose, }) => { - cmd_db_eips(&opctx, &datastore, self.fetch_limit, *verbose) + cmd_db_eips(&opctx, &datastore, &self.fetch_opts, *verbose) .await } DbCommands::Snapshots(SnapshotArgs { @@ -464,19 +475,13 @@ impl DbArgs { }) => cmd_db_snapshot_info(&opctx, &datastore, uuid).await, DbCommands::Snapshots(SnapshotArgs { command: SnapshotCommands::List, - }) => cmd_db_snapshot_list(&datastore, self.fetch_limit).await, + }) => cmd_db_snapshot_list(&datastore, &self.fetch_opts).await, DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateVolumeReferences, - }) => { - cmd_db_validate_volume_references(&datastore, self.fetch_limit) - .await - } + }) => cmd_db_validate_volume_references(&datastore).await, DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateRegionSnapshots, - }) => { - cmd_db_validate_region_snapshots(&datastore, self.fetch_limit) - .await - } + }) => cmd_db_validate_region_snapshots(&datastore).await, } } } @@ -564,7 +569,7 @@ fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { /// Run `omdb db disk list`. async fn cmd_db_disk_list( datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -579,15 +584,19 @@ async fn cmd_db_disk_list( let ctx = || "listing disks".to_string(); use db::schema::disk::dsl; - let disks = dsl::disk - .filter(dsl::time_deleted.is_null()) - .limit(i64::from(u32::from(limit))) + let mut query = dsl::disk.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let disks = query + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .select(Disk::as_select()) .load_async(&*datastore.pool_connection_for_tests().await?) .await .context("loading disks")?; - check_limit(&disks, limit, ctx); + check_limit(&disks, fetch_opts.fetch_limit, ctx); let rows = disks.into_iter().map(|disk| DiskRow { name: disk.name().to_string(), @@ -777,15 +786,19 @@ async fn cmd_db_disk_info( async fn cmd_db_disk_physical( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DiskPhysicalArgs, ) -> Result<(), anyhow::Error> { let conn = datastore.pool_connection_for_tests().await?; // We start by finding any zpools that are using the physical disk. use db::schema::zpool::dsl as zpool_dsl; - let zpools = zpool_dsl::zpool - .filter(zpool_dsl::time_deleted.is_null()) + let mut query = zpool_dsl::zpool.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(zpool_dsl::time_deleted.is_null()); + } + + let zpools = query .filter(zpool_dsl::physical_disk_id.eq(args.uuid)) .select(Zpool::as_select()) .load_async(&*conn) @@ -799,6 +812,7 @@ async fn cmd_db_disk_physical( println!("Found no zpools on physical disk UUID {}", args.uuid); return Ok(()); } + // The current plan is a single zpool per physical disk, so we expect that // this will have a single item. However, If single zpool per disk ever // changes, this code will still work. @@ -808,8 +822,12 @@ async fn cmd_db_disk_physical( // Next, we find all the datasets that are on our zpool. use db::schema::dataset::dsl as dataset_dsl; - let datasets = dataset_dsl::dataset - .filter(dataset_dsl::time_deleted.is_null()) + let mut query = dataset_dsl::dataset.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dataset_dsl::time_deleted.is_null()); + } + + let datasets = query .filter(dataset_dsl::pool_id.eq(zp.id())) .select(Dataset::as_select()) .load_async(&*conn) @@ -862,16 +880,20 @@ async fn cmd_db_disk_physical( // to find the virtual disks associated with these volume IDs and // display information about those disks. use db::schema::disk::dsl; - let disks = dsl::disk - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::disk.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let disks = query .filter(dsl::volume_id.eq_any(volume_ids)) - .limit(i64::from(u32::from(limit))) + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .select(Disk::as_select()) .load_async(&*conn) .await .context("loading disks")?; - check_limit(&disks, limit, || "listing disks".to_string()); + check_limit(&disks, fetch_opts.fetch_limit, || "listing disks".to_string()); #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -924,6 +946,7 @@ async fn cmd_db_disk_physical( println!("{}", table); // Collect the region_snapshots associated with the dataset IDs + let limit = fetch_opts.fetch_limit; use db::schema::region_snapshot::dsl as region_snapshot_dsl; let region_snapshots = region_snapshot_dsl::region_snapshot .filter(region_snapshot_dsl::dataset_id.eq_any(dataset_ids)) @@ -972,8 +995,12 @@ async fn cmd_db_disk_physical( // Get the snapshots from the list of IDs we built above. // Display information about those snapshots. use db::schema::snapshot::dsl as snapshot_dsl; - let snapshots = snapshot_dsl::snapshot - .filter(snapshot_dsl::time_deleted.is_null()) + let mut query = snapshot_dsl::snapshot.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(snapshot_dsl::time_deleted.is_null()); + } + + let snapshots = query .filter(snapshot_dsl::id.eq_any(snapshot_ids)) .limit(i64::from(u32::from(limit))) .select(Snapshot::as_select()) @@ -1046,13 +1073,18 @@ impl From for SnapshotRow { /// Run `omdb db snapshot list`. async fn cmd_db_snapshot_list( datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { let ctx = || "listing snapshots".to_string(); + let limit = fetch_opts.fetch_limit; use db::schema::snapshot::dsl; - let snapshots = dsl::snapshot - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::snapshot.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let snapshots = query .limit(i64::from(u32::from(limit))) .select(Snapshot::as_select()) .load_async(&*datastore.pool_connection_for_tests().await?) @@ -1158,8 +1190,9 @@ async fn cmd_db_snapshot_info( async fn cmd_db_services_list_instances( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sled_list = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1223,8 +1256,9 @@ struct ServiceInstanceSledRow { async fn cmd_db_services_list_by_sled( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sled_list = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1299,8 +1333,9 @@ impl From for SledRow { async fn cmd_db_sleds( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sleds = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1333,11 +1368,18 @@ struct CustomerInstanceRow { async fn cmd_db_instances( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { use db::schema::instance::dsl; use db::schema::vmm::dsl as vmm_dsl; - let instances: Vec = dsl::instance + + let limit = fetch_opts.fetch_limit; + let mut query = dsl::instance.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let instances: Vec = query .left_join( vmm_dsl::vmm.on(vmm_dsl::id .nullable() @@ -1408,7 +1450,7 @@ async fn cmd_db_instances( async fn cmd_db_dns_show( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -1421,6 +1463,7 @@ async fn cmd_db_dns_show( reason: String, } + let limit = fetch_opts.fetch_limit; let mut rows = Vec::with_capacity(2); for group in [DnsGroup::Internal, DnsGroup::External] { let ctx = || format!("listing DNS zones for DNS group {:?}", group); @@ -1493,9 +1536,10 @@ async fn load_zones_version( async fn cmd_db_dns_diff( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DnsVersionArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let (dns_zones, version) = load_zones_version(opctx, datastore, limit, args).await?; @@ -1557,9 +1601,10 @@ async fn cmd_db_dns_diff( async fn cmd_db_dns_names( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DnsVersionArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let (group_zones, version) = load_zones_version(opctx, datastore, limit, args).await?; @@ -1606,17 +1651,24 @@ async fn cmd_db_dns_names( async fn cmd_db_eips( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, verbose: bool, ) -> Result<(), anyhow::Error> { use db::schema::external_ip::dsl; - let ips: Vec = dsl::external_ip - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::external_ip.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let ips: Vec = query .select(ExternalIp::as_select()) + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .get_results_async(&*datastore.pool_connection_for_tests().await?) .await?; - check_limit(&ips, limit, || String::from("listing external ips")); + check_limit(&ips, fetch_opts.fetch_limit, || { + String::from("listing external ips") + }); struct PortRange { first: u16, @@ -1756,7 +1808,6 @@ async fn cmd_db_eips( /// Validate the `volume_references` column of the region snapshots table async fn cmd_db_validate_volume_references( datastore: &DataStore, - limit: NonZeroU32, ) -> Result<(), anyhow::Error> { // First, get all region snapshot records let region_snapshots: Vec = { @@ -1775,10 +1826,6 @@ async fn cmd_db_validate_volume_references( }) .await?; - check_limit(®ion_snapshots, limit, || { - String::from("listing region snapshots") - }); - region_snapshots }; @@ -1823,10 +1870,6 @@ async fn cmd_db_validate_volume_references( }) .await?; - check_limit(&matching_volumes, limit, || { - String::from("finding matching volumes") - }); - matching_volumes }; @@ -1890,7 +1933,6 @@ async fn cmd_db_validate_volume_references( async fn cmd_db_validate_region_snapshots( datastore: &DataStore, - limit: NonZeroU32, ) -> Result<(), anyhow::Error> { let mut regions_to_snapshots_map: BTreeMap> = BTreeMap::default(); @@ -1922,10 +1964,6 @@ async fn cmd_db_validate_region_snapshots( }) .await?; - check_limit(&datasets_region_snapshots, limit, || { - String::from("listing datasets and region snapshots") - }); - datasets_region_snapshots }; @@ -2097,10 +2135,6 @@ async fn cmd_db_validate_region_snapshots( }) .await?; - check_limit(&datasets_and_regions, limit, || { - String::from("listing datasets and regions") - }); - datasets_and_regions }; @@ -2226,9 +2260,10 @@ fn format_record(record: &DnsRecord) -> impl Display { async fn cmd_db_inventory( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, inventory_args: &InventoryArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let conn = datastore.pool_connection_for_tests().await?; match inventory_args.command { InventoryCommands::BaseboardIds => { diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index eaabf970a6..3c5f099c61 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -105,6 +105,8 @@ Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] --fetch-limit limit to apply to queries that fetch rows [env: OMDB_FETCH_LIMIT=] [default: 500] + --include-deleted whether to include soft-deleted records when enumerating objects + that can be soft-deleted -h, --help Print help ============================================= EXECUTING COMMAND: omdb ["db", "--help"] @@ -131,6 +133,8 @@ Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] --fetch-limit limit to apply to queries that fetch rows [env: OMDB_FETCH_LIMIT=] [default: 500] + --include-deleted whether to include soft-deleted records when enumerating objects + that can be soft-deleted -h, --help Print help --------------------------------------------- stderr: From 19059b1bedc8bbc7a9d293486842a9a4fd264ea3 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 11 Jan 2024 10:03:22 -0800 Subject: [PATCH 171/186] Update Crucible and Propolis versions (#4795) Propolis changes since the last update: Gripe when using non-raw block device Update zerocopy dependency nvme: Wire up GetFeatures command Make Viona more robust in the face of errors bump softnpu (#577) Modernize 16550 UART Crucible changes since the last update: Don't check ROP if the scrub is done (#1093) Allow crutest cli to be quiet on generic test (#1070) Offload write encryption (#1066) Simplify handling of BlockReq at program exit (#1085) Update Rust crate byte-unit to v5 (#1054) Remove unused fields in match statements, downstairs edition (#1084) Remove unused fields in match statements and consolidate (#1083) Add logger to Guest (#1082) Drive hash / decrypt tests from Upstairs::apply Wait to reconnect if auto_promote is false Change guest work id from u64 -> GuestWorkId remove BlockOp::Commit (#1072) Various clippy fixes (#1071) Don't panic if tasks are destroyed out of order Update Rust crate reedline to 0.27.1 (#1074) Update Rust crate async-trait to 0.1.75 (#1073) Buffer should destructure to Vec when single-referenced Don't fail to make unencrypted regions (#1067) Fix shadowing in downstairs (#1063) Single-task refactoring (#1058) Update Rust crate tokio to 1.35 (#1052) Update Rust crate openapiv3 to 2.0.0 (#1050) Update Rust crate libc to 0.2.151 (#1049) Update Rust crate rusqlite to 0.30 (#1035) --------- Co-authored-by: Alan Hanson --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cf8bfd887..5d378531ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "bhyve_api_sys", "libc", @@ -471,7 +471,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "libc", "strum", @@ -1312,7 +1312,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "anyhow", "chrono", @@ -1328,7 +1328,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "anyhow", "chrono", @@ -1345,7 +1345,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "crucible-workspace-hack", "libc", @@ -6283,7 +6283,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "async-trait", "base64", @@ -6304,7 +6304,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "anyhow", "atty", @@ -6334,7 +6334,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index fbef04d3c0..5cd13cc04b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -172,9 +172,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -295,9 +295,9 @@ prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 6bd40c320d..406b53c97e 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -396,10 +396,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "850b468c308cf63ef9e10addee36a923a91b7ab64af0fa0836130c830fb42863" +source.sha256 = "030a02551e487f561bcfad47426b953d15c4430d77770765c7fc03afd8d61bd9" output.type = "zone" [package.crucible-pantry] @@ -407,10 +407,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "893f845caa5d9b146137b503e80d5615cbd6e9d393745e81e772b10a9072b58b" +source.sha256 = "c74e23e7f7995ba3a69a9ec3a31f1db517ec15cd3a9942c2c07621b219b743b2" output.type = "zone" # Refer to @@ -421,10 +421,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "f1571ce141421cff3d3328f43e7722f5df96fdda" +source.commit = "1e25649e8c2ac274bd04adfe0513dd14a482058c" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "6e2607f103419a6338936434f3e67afb7cbe14d6397f2d01982ba94b8d0182a9" +source.sha256 = "09c124315da3e434c85fe1ddb16459c36d8302e15705ff18fe6bbc7b4876f5f9" output.type = "zone" [package.mg-ddm-gz] From 3e2db6915df2c03284d15cc6da6790b813d6ed19 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 11 Jan 2024 13:36:44 -0600 Subject: [PATCH 172/186] Bump web console (page size 10 -> 25) (#4803) https://github.com/oxidecomputer/console/compare/bcc80258...367142c5 * [367142c5](https://github.com/oxidecomputer/console/commit/367142c5) oxidecomputer/console#1878 * [c588a63e](https://github.com/oxidecomputer/console/commit/c588a63e) oxidecomputer/console#1874 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index b12bcdbc9f..f9347b6dbf 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="bcc80258f7ddd99f6cc4c94f8cc62c012d08acad" -SHA2="5d219bd7b2e5bd6a23985988be4f557bc79880fb307b1a55c1eed4b2927a8fd5" +COMMIT="367142c5ed711e6dcfc59586277775020625bd6a" +SHA2="7e061165950fc064811cc5f26d7b7bd102c0df63797ef05cf73d737c2fdceb87" From 86c64a2fe8167de354a827425b2d48cfe0f33f33 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 13:24:55 -0800 Subject: [PATCH 173/186] Update Rust crate cargo_toml to 0.18 (#4801) --- Cargo.lock | 4 ++-- dev-tools/xtask/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d378531ba..ef7aefb9f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,9 +794,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca592ad99e6a0fd4b95153406138b997cc26ccd3cd0aecdfd4fbdbf1519bd77" +checksum = "802b755090e39835a4b0440fb0bbee0df7495a8b337f63db21e616f7821c7e8c" dependencies = [ "serde", "toml 0.8.8", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index d054d85646..bccb69a1f7 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -7,6 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true camino.workspace = true -cargo_toml = "0.17" +cargo_toml = "0.18" cargo_metadata = "0.18" clap.workspace = true From 5f62bb8fb8dd29d25ee59118cf6980e3ff32d1e5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:39:41 -0800 Subject: [PATCH 174/186] Update Rust crate assert_cmd to 2.0.13 (#4807) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef7aefb9f2..d4616fd801 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -236,9 +236,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.12" +version = "2.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" +checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467" dependencies = [ "anstyle", "bstr 1.6.0", diff --git a/Cargo.toml b/Cargo.toml index 5cd13cc04b..ca22b9d938 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ anyhow = "1.0" api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" -assert_cmd = "2.0.12" +assert_cmd = "2.0.13" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } async-trait = "0.1.77" atomicwrites = "0.4.3" From 2ebbb7a1a9b0804e22de1fc949cf37f8e73a8580 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 12 Jan 2024 13:37:10 -0800 Subject: [PATCH 175/186] [clickhouse] upload logs on test failure (#4796) Part of trying to figure out #4779 -- we now create ClickHouse logs in a well-named directory which gets not-cleaned up (and therefore uploaded) at the end. Tested this by introducing a failure in a test, and then seeing the following files in `/tmp`: ``` % find test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08 test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08 test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/log test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/clickhouse-server.log test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/clickhouse-server.errlog ``` --- .../buildomat/jobs/build-and-test-helios.sh | 2 +- .../buildomat/jobs/build-and-test-linux.sh | 2 +- Cargo.lock | 110 ++++++++++------ dev-tools/omicron-dev/src/bin/omicron-dev.rs | 32 +++-- nexus/benches/setup_benchmark.rs | 6 +- nexus/test-utils/src/lib.rs | 10 +- oximeter/db/src/client.rs | 30 +++-- test-utils/Cargo.toml | 1 + test-utils/src/dev/clickhouse.rs | 119 +++++++++++++++--- workspace-hack/Cargo.toml | 4 +- 10 files changed, 232 insertions(+), 84 deletions(-) diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index f9722a2b92..2c7a1f884d 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -5,7 +5,7 @@ #: target = "helios-2.0" #: rust_toolchain = "1.72.1" #: output_rules = [ -#: "/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index 715effd080..4f4ebc1d8a 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -5,7 +5,7 @@ #: target = "ubuntu-22.04" #: rust_toolchain = "1.72.1" #: output_rules = [ -#: "/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/Cargo.lock b/Cargo.lock index d4616fd801..63b36090e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1836,7 +1836,7 @@ dependencies = [ "dns-service-client", "dropshot", "expectorate", - "http", + "http 0.2.11", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", @@ -1868,7 +1868,7 @@ name = "dns-service-client" version = "0.1.0" dependencies = [ "chrono", - "http", + "http 0.2.11", "omicron-workspace-hack", "progenitor", "reqwest", @@ -1906,7 +1906,7 @@ dependencies = [ "anyhow", "chrono", "futures", - "http", + "http 0.2.11", "ipnetwork", "omicron-workspace-hack", "omicron-zone-package", @@ -1928,7 +1928,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" dependencies = [ "async-stream", "async-trait", @@ -1941,7 +1941,7 @@ dependencies = [ "form_urlencoded", "futures", "hostname", - "http", + "http 0.2.11", "hyper", "indexmap 2.1.0", "multer", @@ -1950,7 +1950,7 @@ dependencies = [ "percent-encoding", "proc-macro2", "rustls", - "rustls-pemfile", + "rustls-pemfile 2.0.0", "schemars", "serde", "serde_json", @@ -1974,7 +1974,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" dependencies = [ "proc-macro2", "quote", @@ -2112,7 +2112,7 @@ dependencies = [ "async-trait", "base64", "chrono", - "http", + "http 0.2.11", "hyper", "omicron-sled-agent", "omicron-test-utils", @@ -2749,7 +2749,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.11", "indexmap 1.9.3", "slab", "tokio", @@ -2802,7 +2802,7 @@ dependencies = [ "base64", "bytes", "headers-core", - "http", + "http 0.2.11", "httpdate", "mime", "sha1", @@ -2814,7 +2814,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" dependencies = [ - "http", + "http 0.2.11", ] [[package]] @@ -2930,6 +2930,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.5" @@ -2937,7 +2948,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.11", "pin-project-lite", ] @@ -2970,7 +2981,7 @@ dependencies = [ "crossbeam-channel", "form_urlencoded", "futures", - "http", + "http 0.2.11", "hyper", "log", "once_cell", @@ -3057,7 +3068,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "httparse", "httpdate", @@ -3077,7 +3088,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", + "http 0.2.11", "hyper", "log", "rustls", @@ -3093,7 +3104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "318ca89e4827e7fe4ddd2824f52337239796ae8ecc761a663324407dc3d8d7e7" dependencies = [ "futures-util", - "http", + "http 0.2.11", "http-range", "httpdate", "hyper", @@ -3302,7 +3313,7 @@ dependencies = [ "futures", "hex", "hex-literal", - "http", + "http 0.2.11", "illumos-utils", "installinator-artifact-client", "installinator-common", @@ -4000,14 +4011,14 @@ dependencies = [ [[package]] name = "multer" -version = "2.1.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +checksum = "a15d522be0a9c3e46fd2632e272d178f56387bdb5c9fbb3a36c649062e9b5219" dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 1.0.0", "httparse", "log", "memchr", @@ -4151,7 +4162,7 @@ dependencies = [ "futures", "gateway-client", "headers", - "http", + "http 0.2.11", "hyper", "hyper-rustls", "internal-dns", @@ -4270,7 +4281,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "headers", - "http", + "http 0.2.11", "hyper", "internal-dns", "nexus-db-queries", @@ -4619,7 +4630,7 @@ dependencies = [ "expectorate", "futures", "hex", - "http", + "http 0.2.11", "ipnetwork", "libc", "macaddr", @@ -4710,7 +4721,7 @@ dependencies = [ "gateway-sp-comms", "gateway-test-utils", "hex", - "http", + "http 0.2.11", "hyper", "illumos-utils", "ipcc", @@ -4768,7 +4779,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", - "http", + "http 0.2.11", "httptest", "hubtools", "hyper", @@ -4980,7 +4991,7 @@ dependencies = [ "glob", "guppy", "hex", - "http", + "http 0.2.11", "hyper", "hyper-staticfile", "illumos-utils", @@ -5052,7 +5063,7 @@ dependencies = [ "filetime", "headers", "hex", - "http", + "http 0.2.11", "libc", "omicron-common", "omicron-workspace-hack", @@ -5070,6 +5081,7 @@ dependencies = [ "tokio", "tokio-postgres", "usdt", + "walkdir", ] [[package]] @@ -5385,7 +5397,7 @@ dependencies = [ "base64", "chrono", "futures", - "http", + "http 0.2.11", "hyper", "omicron-workspace-hack", "progenitor", @@ -5540,7 +5552,7 @@ dependencies = [ "chrono", "dropshot", "futures", - "http", + "http 0.2.11", "kstat-rs", "omicron-workspace-hack", "oximeter", @@ -6248,7 +6260,7 @@ source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628 dependencies = [ "getopts", "heck 0.4.1", - "http", + "http 0.2.11", "indexmap 2.1.0", "openapiv3", "proc-macro2", @@ -6746,7 +6758,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "hyper", "hyper-rustls", @@ -6760,7 +6772,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile", + "rustls-pemfile 1.0.3", "serde", "serde_json", "serde_urlencoded", @@ -7098,7 +7110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.3", "schannel", "security-framework", ] @@ -7112,6 +7124,22 @@ dependencies = [ "base64", ] +[[package]] +name = "rustls-pemfile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +dependencies = [ + "base64", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7375,9 +7403,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" dependencies = [ "serde_derive", ] @@ -7413,9 +7441,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", @@ -7813,9 +7841,9 @@ dependencies = [ [[package]] name = "slog-bunyan" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "440fd32d0423c31e4f98d76c0b62ebdb847f905aa07357197e9b41ac620af97d" +checksum = "dcaaf6e68789d3f0411f1e72bc443214ef252a1038b6e344836e50442541f190" dependencies = [ "hostname", "slog", @@ -9185,7 +9213,7 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http", + "http 0.2.11", "httparse", "log", "rand 0.8.5", @@ -9840,7 +9868,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", - "http", + "http 0.2.11", "hubtools", "hyper", "illumos-utils", diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index bbff4f6fe5..0eb421478c 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -9,6 +9,7 @@ use camino::Utf8Path; use camino::Utf8PathBuf; use clap::Args; use clap::Parser; +use dropshot::test_util::LogContext; use futures::stream::StreamExt; use nexus_test_interface::NexusServer; use omicron_common::cmd::fatal; @@ -270,22 +271,32 @@ struct ChRunArgs { } async fn cmd_clickhouse_run(args: &ChRunArgs) -> Result<(), anyhow::Error> { + let logctx = LogContext::new( + "omicron-dev", + &dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + ); if args.replicated { - start_replicated_cluster().await?; + start_replicated_cluster(&logctx).await?; } else { - start_single_node(args.port).await?; + start_single_node(&logctx, args.port).await?; } Ok(()) } -async fn start_single_node(port: u16) -> Result<(), anyhow::Error> { +async fn start_single_node( + logctx: &LogContext, + port: u16, +) -> Result<(), anyhow::Error> { // Start a stream listening for SIGINT let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); let mut signal_stream = signals.fuse(); // Start the database server process, possibly on a specific port let mut db_instance = - dev::clickhouse::ClickHouseInstance::new_single_node(port).await?; + dev::clickhouse::ClickHouseInstance::new_single_node(logctx, port) + .await?; println!( "omicron-dev: running ClickHouse with full command:\n\"clickhouse {}\"", db_instance.cmdline().join(" ") @@ -331,7 +342,9 @@ async fn start_single_node(port: u16) -> Result<(), anyhow::Error> { Ok(()) } -async fn start_replicated_cluster() -> Result<(), anyhow::Error> { +async fn start_replicated_cluster( + logctx: &LogContext, +) -> Result<(), anyhow::Error> { // Start a stream listening for SIGINT let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); let mut signal_stream = signals.fuse(); @@ -345,9 +358,12 @@ async fn start_replicated_cluster() -> Result<(), anyhow::Error> { .as_path() .join("../../oximeter/db/src/configs/keeper_config.xml"); - let mut cluster = - dev::clickhouse::ClickHouseCluster::new(replica_config, keeper_config) - .await?; + let mut cluster = dev::clickhouse::ClickHouseCluster::new( + logctx, + replica_config, + keeper_config, + ) + .await?; println!( "omicron-dev: running ClickHouse cluster with configuration files:\n \ replicas: {}\n keepers: {}", diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index 304ccc8325..5e87151512 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -28,8 +28,12 @@ async fn do_crdb_setup() { // Wraps exclusively the ClickhouseDB portion of setup/teardown. async fn do_clickhouse_setup() { + let cfg = nexus_test_utils::load_test_config(); + let logctx = LogContext::new("clickhouse_setup", &cfg.pkg.log); let mut clickhouse = - dev::clickhouse::ClickHouseInstance::new_single_node(0).await.unwrap(); + dev::clickhouse::ClickHouseInstance::new_single_node(&logctx, 0) + .await + .unwrap(); clickhouse.cleanup().await.unwrap(); } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index c6dc9fefe9..19d5f747d8 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -387,10 +387,12 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { pub async fn start_clickhouse(&mut self) { let log = &self.logctx.log; debug!(log, "Starting Clickhouse"); - let clickhouse = - dev::clickhouse::ClickHouseInstance::new_single_node(0) - .await - .unwrap(); + let clickhouse = dev::clickhouse::ClickHouseInstance::new_single_node( + &self.logctx, + 0, + ) + .await + .unwrap(); let port = clickhouse.port(); let zpool_id = Uuid::new_v4(); diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index d6ec01d9fc..fc46a2c498 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -1399,6 +1399,7 @@ mod tests { use crate::query::field_table_name; use bytes::Bytes; use chrono::Utc; + use dropshot::test_util::LogContext; use omicron_test_utils::dev::clickhouse::{ ClickHouseCluster, ClickHouseInstance, }; @@ -1463,8 +1464,9 @@ mod tests { #[tokio::test] async fn test_single_node() { + let logctx = test_setup_log("test_single_node"); // Let the OS assign a port and discover it after ClickHouse starts - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); @@ -1635,22 +1637,24 @@ mod tests { .unwrap(); db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); } - async fn create_cluster() -> ClickHouseCluster { + async fn create_cluster(logctx: &LogContext) -> ClickHouseCluster { let cur_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let replica_config = cur_dir.as_path().join("src/configs/replica_config.xml"); let keeper_config = cur_dir.as_path().join("src/configs/keeper_config.xml"); - ClickHouseCluster::new(replica_config, keeper_config) + ClickHouseCluster::new(logctx, replica_config, keeper_config) .await .expect("Failed to initialise ClickHouse Cluster") } #[tokio::test] async fn test_replicated() { - let mut cluster = create_cluster().await; + let logctx = test_setup_log("test_replicated"); + let mut cluster = create_cluster(&logctx).await; // Tests that the expected error is returned on a wrong address bad_db_connection_test().await.unwrap(); @@ -1884,6 +1888,8 @@ mod tests { .cleanup() .await .expect("Failed to cleanup ClickHouse server 2"); + + logctx.cleanup_successful(); } async fn bad_db_connection_test() -> Result<(), Error> { @@ -4099,7 +4105,7 @@ mod tests { const TEST_NAME: &str = "test_apply_one_schema_upgrade_replicated"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut cluster = create_cluster().await; + let mut cluster = create_cluster(&logctx).await; let address = cluster.replica_1.address; test_apply_one_schema_upgrade_impl(log, address, true).await; @@ -4138,7 +4144,7 @@ mod tests { const TEST_NAME: &str = "test_apply_one_schema_upgrade_single_node"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4152,7 +4158,7 @@ mod tests { let logctx = test_setup_log("test_ensure_schema_with_version_gaps_fails"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4195,7 +4201,7 @@ mod tests { "test_ensure_schema_with_missing_desired_schema_version_fails", ); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4327,7 +4333,7 @@ mod tests { "test_ensure_schema_walks_through_multiple_steps_single_node"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4345,7 +4351,7 @@ mod tests { "test_ensure_schema_walks_through_multiple_steps_replicated"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut cluster = create_cluster().await; + let mut cluster = create_cluster(&logctx).await; let address = cluster.replica_1.address; test_ensure_schema_walks_through_multiple_steps_impl( log, address, true, @@ -4448,7 +4454,7 @@ mod tests { let logctx = test_setup_log("test_select_all_field_types"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4479,7 +4485,7 @@ mod tests { async fn test_sql_query_output() { let logctx = test_setup_log("test_sql_query_output"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 7f210134a2..48223e0291 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -30,6 +30,7 @@ usdt.workspace = true rcgen.workspace = true regex.workspace = true reqwest.workspace = true +walkdir.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index c73161eec7..220662d9bb 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -4,13 +4,15 @@ //! Tools for managing ClickHouse during development +use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::process::Stdio; use std::time::Duration; use anyhow::{anyhow, Context}; use camino::{Utf8Path, Utf8PathBuf}; -use camino_tempfile::Utf8TempDir; +use camino_tempfile::{Builder, Utf8TempDir}; +use dropshot::test_util::{log_prefix_for_test, LogContext}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use thiserror::Error; use tokio::{ @@ -63,8 +65,11 @@ pub enum ClickHouseError { impl ClickHouseInstance { /// Start a new single node ClickHouse server on the given IPv6 port. - pub async fn new_single_node(port: u16) -> Result { - let data_dir = ClickHouseDataDir::new()?; + pub async fn new_single_node( + logctx: &LogContext, + port: u16, + ) -> Result { + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "server".to_string(), "--log-file".to_string(), @@ -115,6 +120,7 @@ impl ClickHouseInstance { /// Start a new replicated ClickHouse server on the given IPv6 port. pub async fn new_replicated( + logctx: &LogContext, port: u16, tcp_port: u16, interserver_port: u16, @@ -122,7 +128,7 @@ impl ClickHouseInstance { r_number: String, config_path: PathBuf, ) -> Result { - let data_dir = ClickHouseDataDir::new()?; + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "server".to_string(), "--config-file".to_string(), @@ -181,6 +187,7 @@ impl ClickHouseInstance { /// Start a new ClickHouse keeper on the given IPv6 port. pub async fn new_keeper( + logctx: &LogContext, port: u16, k_id: u16, config_path: PathBuf, @@ -191,7 +198,7 @@ impl ClickHouseInstance { if ![1, 2, 3].contains(&k_id) { return Err(ClickHouseError::InvalidKeeperId.into()); } - let data_dir = ClickHouseDataDir::new()?; + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "keeper".to_string(), @@ -262,7 +269,7 @@ impl ClickHouseInstance { child.wait().await.context("waiting for child")?; } if let Some(dir) = self.data_dir.take() { - dir.close()?; + dir.close_clean()?; } Ok(()) } @@ -294,10 +301,12 @@ struct ClickHouseDataDir { } impl ClickHouseDataDir { - fn new() -> Result { - // Keepers do not allow a dot in the beginning of the directory, so we must - // use a prefix. - let dir = Utf8TempDir::with_prefix("clickhouse-") + fn new(logctx: &LogContext) -> Result { + let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); + + let dir = Builder::new() + .prefix(&format!("{prefix}-clickhouse-")) + .tempdir_in(parent_dir) .context("failed to create tempdir for ClickHouse data")?; let ret = Self { dir }; @@ -375,9 +384,83 @@ impl ClickHouseDataDir { self.dir.path().join("snapshots/") } - fn close(self) -> Result<(), anyhow::Error> { + fn close_clean(self) -> Result<(), anyhow::Error> { self.dir.close().context("failed to delete ClickHouse data dir") } + + /// Closes this data directory during a test failure, or other unclean + /// shutdown. + /// + /// Removes all files except those in any of the log directories. + fn close_unclean(self) -> Result<(), anyhow::Error> { + let keep_prefixes = vec![ + self.log_path(), + self.err_log_path(), + self.keeper_log_path(), + self.keeper_err_log_path(), + self.keeper_log_storage_path(), + ]; + // Persist this temporary directory since we're going to be doing the + // cleanup ourselves. + let dir = self.dir.into_path(); + + let mut error_paths = BTreeMap::new(); + // contents_first = true ensures that we delete inner files before + // outer directories. + for entry in walkdir::WalkDir::new(&dir).contents_first(true) { + match entry { + Ok(entry) => { + // If it matches any of the prefixes, skip it. + if keep_prefixes + .iter() + .any(|prefix| entry.path().starts_with(prefix)) + { + continue; + } + if entry.file_type().is_dir() { + if let Err(error) = std::fs::remove_dir(entry.path()) { + // Ignore ENOTEMPTY errors because they're likely + // generated from parents of files we've kept, or + // were unable to delete for other reasons. + if error.raw_os_error() != Some(libc::ENOTEMPTY) { + error_paths.insert( + entry.path().to_owned(), + anyhow!(error), + ); + } + } + } else { + if let Err(error) = std::fs::remove_file(entry.path()) { + error_paths.insert( + entry.path().to_owned(), + anyhow!(error), + ); + } + } + } + Err(error) => { + if let Some(path) = error.path() { + error_paths.insert(path.to_owned(), anyhow!(error)); + } + } + } + } + + // Are there any error paths? + if !error_paths.is_empty() { + let error_paths = error_paths + .into_iter() + .map(|(path, error)| format!("- {}: {}", path.display(), error)) + .collect::>(); + let error_paths = error_paths.join("\n"); + return Err(anyhow!( + "failed to clean up ClickHouse data dir:\n{}", + error_paths + )); + } + + Ok(()) + } } impl Drop for ClickHouseInstance { @@ -392,7 +475,9 @@ impl Drop for ClickHouseInstance { let _ = child.start_kill(); } if let Some(dir) = self.data_dir.take() { - let _ = dir.close(); + if let Err(e) = dir.close_unclean() { + eprintln!("{}", e); + } } } } @@ -412,18 +497,20 @@ pub struct ClickHouseCluster { impl ClickHouseCluster { pub async fn new( + logctx: &LogContext, replica_config: PathBuf, keeper_config: PathBuf, ) -> Result { // Start all Keeper coordinator nodes let keeper_amount = 3; let mut keepers = - Self::new_keeper_set(keeper_amount, &keeper_config).await?; + Self::new_keeper_set(logctx, keeper_amount, &keeper_config).await?; // Start all replica nodes let replica_amount = 2; let mut replicas = - Self::new_replica_set(replica_amount, &replica_config).await?; + Self::new_replica_set(logctx, replica_amount, &replica_config) + .await?; let r1 = replicas.swap_remove(0); let r2 = replicas.swap_remove(0); @@ -443,6 +530,7 @@ impl ClickHouseCluster { } pub async fn new_keeper_set( + logctx: &LogContext, keeper_amount: u16, config_path: &PathBuf, ) -> Result, anyhow::Error> { @@ -453,6 +541,7 @@ impl ClickHouseCluster { let k_id = i; let k = ClickHouseInstance::new_keeper( + logctx, k_port, k_id, config_path.clone(), @@ -468,6 +557,7 @@ impl ClickHouseCluster { } pub async fn new_replica_set( + logctx: &LogContext, replica_amount: u16, config_path: &PathBuf, ) -> Result, anyhow::Error> { @@ -480,6 +570,7 @@ impl ClickHouseCluster { let r_name = format!("oximeter_cluster node {}", i); let r_number = format!("0{}", i); let r = ClickHouseInstance::new_replicated( + logctx, r_port, r_tcp_port, r_interserver_port, diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5688e133c0..8646e08c27 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -87,7 +87,7 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } -serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } @@ -190,7 +190,7 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } -serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } From 095dbbde156931822a1394fa8248d76825bffaf4 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:22:54 -0800 Subject: [PATCH 176/186] Update Rust crate base64 to 0.21.7 (#4812) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63b36090e0..75c77f869f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] name = "base64" -version = "0.21.6" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c79fed4cdb43e993fcdadc7e58a09fd0e3e649c4436fa11da71c9f1f3ee7feb9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64ct" diff --git a/Cargo.toml b/Cargo.toml index ca22b9d938..238b9e36bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,7 +151,7 @@ async-trait = "0.1.77" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } -base64 = "0.21.6" +base64 = "0.21.7" bb8 = "0.8.1" bcs = "0.1.6" bincode = "1.3.3" From 35838d1df8b626135857815db621cb89acb22bd2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:28:51 -0800 Subject: [PATCH 177/186] Update Rust crate console to 0.15.8 (#4798) --- Cargo.lock | 72 ++------------------------------------- tufaceous/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 +-- 3 files changed, 6 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75c77f869f..4b7360ae8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1037,15 +1037,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -9982,15 +9982,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -10009,21 +10000,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -10054,12 +10030,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -10072,12 +10042,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -10090,12 +10054,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -10108,12 +10066,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -10126,12 +10078,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -10144,12 +10090,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -10162,12 +10102,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/tufaceous/Cargo.toml b/tufaceous/Cargo.toml index 81248af57d..b911c85b81 100644 --- a/tufaceous/Cargo.toml +++ b/tufaceous/Cargo.toml @@ -10,7 +10,7 @@ anyhow = { workspace = true, features = ["backtrace"] } camino.workspace = true clap = { workspace = true, features = ["derive", "env"] } chrono.workspace = true -console = { version = "0.15.7", default-features = false } +console = { version = "0.15.8", default-features = false } humantime.workspace = true omicron-common.workspace = true slog.workspace = true diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8646e08c27..4eda5c1af4 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -29,7 +29,7 @@ chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } -console = { version = "0.15.7" } +console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } @@ -132,7 +132,7 @@ chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } -console = { version = "0.15.7" } +console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } From a00fe948a75395eaf3cc40ed8d8aed713bbd5a0a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 09:34:41 +0000 Subject: [PATCH 178/186] Update taiki-e/install-action digest to 681c09d (#4813) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`a6173a9` -> `681c09d`](https://togithub.com/taiki-e/install-action/compare/a6173a9...681c09d) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index c940f21fb2..67fefd1430 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@a6173a9cbc8927eb1def26c72d123d297efb1b10 # v2 + uses: taiki-e/install-action@681c09da0e1063a389bc0f4cfa913bfdfdaf0a4d # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 3b4b935a6e2ae28a6510df93004555cfff15a03e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Tue, 16 Jan 2024 12:18:31 +1300 Subject: [PATCH 179/186] Zone network configuration service (#4677) As part of the work for [self assembling zones](https://github.com/oxidecomputer/omicron/issues/1898), it was [suggested](https://github.com/oxidecomputer/omicron/pull/4534#issuecomment-1821457494) to break the network configuration out into a separate service. ## Implementation This PR introduces a new SMF service `oxide/zone-network-setup`, which sets up the common initial zone networking configuration for each self assembled zone. Each of the "self assembled zone" services will now depend on this new service to run, and all properties relating to zone network configuration have been removed from these services. The executable which does the actual zone networking setup, is built as a tiny CLI. It takes advantage of clap's parsing validation to make sure we have all of the properties present, and in the format they are intended to be. ## Caveats There are two remaining self assembled zones that don't depend on this new service yet (crucible and crucible-pantry). As these two zones need coordinated PRs with the crucible repo, I'd like to implement these in a follow up PR once this one is approved and merged. --- Cargo.lock | 14 +++ Cargo.toml | 4 +- illumos-utils/src/ipadm.rs | 110 ++++++++++++++++++ illumos-utils/src/lib.rs | 4 +- illumos-utils/src/route.rs | 59 ++++++++++ illumos-utils/src/zone.rs | 1 + package-manifest.toml | 18 ++- sled-agent/src/services.rs | 66 ++++++++--- smf/clickhouse/manifest.xml | 7 +- smf/clickhouse/method_script.sh | 18 --- smf/clickhouse_keeper/manifest.xml | 7 +- smf/clickhouse_keeper/method_script.sh | 18 --- smf/cockroachdb/manifest.xml | 7 +- smf/cockroachdb/method_script.sh | 18 --- smf/zone-network-setup/manifest.xml | 46 ++++++++ workspace-hack/Cargo.toml | 8 +- zone-network-setup/Cargo.toml | 15 +++ zone-network-setup/src/bin/zone-networking.rs | 92 +++++++++++++++ 18 files changed, 424 insertions(+), 88 deletions(-) create mode 100644 illumos-utils/src/ipadm.rs create mode 100644 illumos-utils/src/route.rs create mode 100644 smf/zone-network-setup/manifest.xml create mode 100644 zone-network-setup/Cargo.toml create mode 100644 zone-network-setup/src/bin/zone-networking.rs diff --git a/Cargo.lock b/Cargo.lock index 4b7360ae8b..7db604dcb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10298,6 +10298,20 @@ dependencies = [ "zone_cfg_derive", ] +[[package]] +name = "zone-network-setup" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 4.4.3", + "dropshot", + "illumos-utils", + "omicron-common", + "omicron-workspace-hack", + "slog", + "tokio", +] + [[package]] name = "zone_cfg_derive" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 238b9e36bf..515e767bfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ members = [ "wicket", "wicketd", "workspace-hack", + "zone-network-setup", ] default-members = [ @@ -137,6 +138,7 @@ default-members = [ "wicket-dbg", "wicket", "wicketd", + "zone-network-setup", ] resolver = "2" @@ -167,7 +169,7 @@ chacha20poly1305 = "0.10.1" ciborium = "0.2.1" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } -clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } +clap = { version = "4.4", features = ["cargo", "derive", "env", "wrap_help"] } cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" diff --git a/illumos-utils/src/ipadm.rs b/illumos-utils/src/ipadm.rs new file mode 100644 index 0000000000..f4d884d452 --- /dev/null +++ b/illumos-utils/src/ipadm.rs @@ -0,0 +1,110 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for managing IP interfaces. + +use crate::zone::IPADM; +use crate::{execute, ExecutionError, PFEXEC}; +use std::net::Ipv6Addr; + +/// Wraps commands for interacting with interfaces. +pub struct Ipadm {} + +#[cfg_attr(any(test, feature = "testing"), mockall::automock)] +impl Ipadm { + // Remove current IP interface and create a new temporary one. + pub fn set_temp_interface_for_datalink( + datalink: &str, + ) -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[IPADM, "delete-if", datalink]); + // First we remove IP interface if it already exists. If it doesn't + // exist and the command returns an error we continue anyway as + // the next step is to create it. + let _ = execute(cmd); + + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[IPADM, "create-if", "-t", datalink]); + execute(cmd)?; + Ok(()) + } + + // Set MTU to 9000 on both IPv4 and IPv6 + pub fn set_interface_mtu(datalink: &str) -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + IPADM, + "set-ifprop", + "-t", + "-p", + "mtu=9000", + "-m", + "ipv4", + datalink, + ]); + execute(cmd)?; + + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + IPADM, + "set-ifprop", + "-t", + "-p", + "mtu=9000", + "-m", + "ipv6", + datalink, + ]); + execute(cmd)?; + Ok(()) + } + + pub fn create_static_and_autoconfigured_addrs( + datalink: &str, + listen_addr: &Ipv6Addr, + ) -> Result<(), ExecutionError> { + // Create auto-configured address on the IP interface if it doesn't already exist + let addrobj = format!("{}/ll", datalink); + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[IPADM, "show-addr", &addrobj]); + match execute(cmd) { + Err(_) => { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + IPADM, + "create-addr", + "-t", + "-T", + "addrconf", + &addrobj, + ]); + execute(cmd)?; + } + Ok(_) => (), + }; + + // Create static address on the IP interface if it doesn't already exist + let addrobj = format!("{}/omicron6", datalink); + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[IPADM, "show-addr", &addrobj]); + match execute(cmd) { + Err(_) => { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + IPADM, + "create-addr", + "-t", + "-T", + "static", + "-a", + &listen_addr.to_string(), + &addrobj, + ]); + execute(cmd)?; + } + Ok(_) => (), + }; + Ok(()) + } +} diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 1faa4c5c37..550170b0f2 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -16,9 +16,11 @@ pub mod dkio; pub mod dladm; pub mod dumpadm; pub mod fstyp; +pub mod ipadm; pub mod libc; pub mod link; pub mod opte; +pub mod route; pub mod running_zone; pub mod scf; pub mod svc; @@ -70,7 +72,7 @@ pub enum ExecutionError { mod inner { use super::*; - fn to_string(command: &mut std::process::Command) -> String { + pub fn to_string(command: &mut std::process::Command) -> String { command .get_args() .map(|s| s.to_string_lossy().into()) diff --git a/illumos-utils/src/route.rs b/illumos-utils/src/route.rs new file mode 100644 index 0000000000..2b6af9a9fd --- /dev/null +++ b/illumos-utils/src/route.rs @@ -0,0 +1,59 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for manipulating the routing tables. + +use crate::zone::ROUTE; +use crate::{execute, inner, output_to_exec_error, ExecutionError, PFEXEC}; +use libc::ESRCH; +use std::net::Ipv6Addr; + +/// Wraps commands for interacting with routing tables. +pub struct Route {} + +#[cfg_attr(any(test, feature = "testing"), mockall::automock)] +impl Route { + pub fn ensure_default_route_with_gateway( + gateway: &Ipv6Addr, + ) -> Result<(), ExecutionError> { + // Add the desired route if it doesn't already exist + let destination = "default"; + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + ROUTE, + "-n", + "get", + "-inet6", + destination, + "-inet6", + &gateway.to_string(), + ]); + + let out = + cmd.output().map_err(|err| ExecutionError::ExecutionStart { + command: inner::to_string(cmd), + err, + })?; + match out.status.code() { + Some(0) => (), + // If the entry is not found in the table, + // the exit status of the command will be 3 (ESRCH). + // When that is the case, we'll add the route. + Some(ESRCH) => { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + ROUTE, + "add", + "-inet6", + destination, + "-inet6", + &gateway.to_string(), + ]); + execute(cmd)?; + } + Some(_) | None => return Err(output_to_exec_error(cmd, &out)), + }; + Ok(()) + } +} diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index a3f73b3954..3f749fc352 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -22,6 +22,7 @@ pub const IPADM: &str = "/usr/sbin/ipadm"; pub const SVCADM: &str = "/usr/sbin/svcadm"; pub const SVCCFG: &str = "/usr/sbin/svccfg"; pub const ZLOGIN: &str = "/usr/sbin/zlogin"; +pub const ROUTE: &str = "/usr/sbin/route"; // TODO: These could become enums pub const ZONE_PREFIX: &str = "oxz_"; diff --git a/package-manifest.toml b/package-manifest.toml index 406b53c97e..16f8f70c73 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -132,7 +132,7 @@ output.type = "zone" service_name = "clickhouse" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz" ] +source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ] output.type = "zone" [package.clickhouse_svc] @@ -153,7 +153,7 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin service_name = "clickhouse_keeper" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz" ] +source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ] output.type = "zone" [package.clickhouse_keeper_svc] @@ -174,7 +174,7 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin service_name = "cockroachdb" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz" ] +source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ] output.type = "zone" [package.cockroachdb-service] @@ -613,3 +613,15 @@ source.packages = [ "sp-sim-softnpu.tar.gz" ] output.type = "zone" + +[package.zone-network-setup] +service_name = "zone-network-setup" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["zone-networking"] +source.rust.release = true +source.paths = [ + { from = "smf/zone-network-setup/manifest.xml", to = "/var/svc/manifest/site/zone-network-setup/manifest.xml" }, +] +output.type = "zone" +output.intermediate_only = true diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e240fb4d03..adabe80807 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1419,6 +1419,25 @@ impl ServiceManager { .add_instance(ServiceInstanceBuilder::new("default"))) } + fn zone_network_setup_install( + info: &SledAgentInfo, + zone: &InstalledZone, + static_addr: &String, + ) -> Result { + let datalink = zone.get_control_vnic_name(); + let gateway = &info.underlay_address.to_string(); + + let mut config_builder = PropertyGroupBuilder::new("config"); + config_builder = config_builder + .add_property("datalink", "astring", datalink) + .add_property("gateway", "astring", gateway) + .add_property("static_addr", "astring", static_addr); + + Ok(ServiceBuilder::new("oxide/zone-network-setup") + .add_property_group(config_builder) + .add_instance(ServiceInstanceBuilder::new("default"))) + } + async fn initialize_zone( &self, request: ZoneArgs<'_>, @@ -1532,16 +1551,18 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let dns_service = Self::dns_install(info).await?; - - let datalink = installed_zone.get_control_vnic_name(); - let gateway = &info.underlay_address.to_string(); let listen_addr = &underlay_address.to_string(); let listen_port = &CLICKHOUSE_PORT.to_string(); + let nw_setup_service = Self::zone_network_setup_install( + info, + &installed_zone, + listen_addr, + )?; + + let dns_service = Self::dns_install(info).await?; + let config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); @@ -1552,6 +1573,7 @@ impl ServiceManager { ); let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_service) .add_service(dns_service); @@ -1577,16 +1599,18 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let dns_service = Self::dns_install(info).await?; - - let datalink = installed_zone.get_control_vnic_name(); - let gateway = &info.underlay_address.to_string(); let listen_addr = &underlay_address.to_string(); let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); + let nw_setup_service = Self::zone_network_setup_install( + info, + &installed_zone, + listen_addr, + )?; + + let dns_service = Self::dns_install(info).await?; + let config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); @@ -1597,6 +1621,7 @@ impl ServiceManager { .add_property_group(config), ); let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_keeper_service) .add_service(dns_service); @@ -1625,11 +1650,6 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let dns_service = Self::dns_install(info).await?; - - // Configure the CockroachDB service. - let datalink = installed_zone.get_control_vnic_name(); - let gateway = &info.underlay_address.to_string(); let address = SocketAddr::new( IpAddr::V6(*underlay_address), COCKROACH_PORT, @@ -1637,9 +1657,16 @@ impl ServiceManager { let listen_addr = &address.ip().to_string(); let listen_port = &address.port().to_string(); + let nw_setup_service = Self::zone_network_setup_install( + info, + &installed_zone, + listen_addr, + )?; + + let dns_service = Self::dns_install(info).await?; + + // Configure the CockroachDB service. let cockroachdb_config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); @@ -1650,6 +1677,7 @@ impl ServiceManager { ); let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(cockroachdb_service) .add_service(dns_service); diff --git a/smf/clickhouse/manifest.xml b/smf/clickhouse/manifest.xml index bf8d0d7e8a..5d227f1b28 100644 --- a/smf/clickhouse/manifest.xml +++ b/smf/clickhouse/manifest.xml @@ -11,14 +11,17 @@ + + + + - - diff --git a/smf/clickhouse/method_script.sh b/smf/clickhouse/method_script.sh index 3cc8c585ad..224d759cf3 100755 --- a/smf/clickhouse/method_script.sh +++ b/smf/clickhouse/method_script.sh @@ -9,24 +9,6 @@ set -o pipefail LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -DATALINK="$(svcprop -c -p config/datalink "${SMF_FMRI}")" -GATEWAY="$(svcprop -c -p config/gateway "${SMF_FMRI}")" - -if [[ $DATALINK == unknown ]] || [[ $GATEWAY == unknown ]]; then - printf 'ERROR: missing datalink or gateway\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" -fi - -# TODO remove when https://github.com/oxidecomputer/stlouis/issues/435 is addressed -ipadm delete-if "$DATALINK" || true -ipadm create-if -t "$DATALINK" - -ipadm set-ifprop -t -p mtu=9000 -m ipv4 "$DATALINK" -ipadm set-ifprop -t -p mtu=9000 -m ipv6 "$DATALINK" - -ipadm show-addr "$DATALINK/ll" || ipadm create-addr -t -T addrconf "$DATALINK/ll" -ipadm show-addr "$DATALINK/omicron6" || ipadm create-addr -t -T static -a "$LISTEN_ADDR" "$DATALINK/omicron6" -route get -inet6 default -inet6 "$GATEWAY" || route add -inet6 default -inet6 "$GATEWAY" # TEMPORARY: Racks will be set up with single node ClickHouse until # Nexus provisions services so there is no divergence between racks diff --git a/smf/clickhouse_keeper/manifest.xml b/smf/clickhouse_keeper/manifest.xml index 9e79cc131c..fc11e3dfd5 100644 --- a/smf/clickhouse_keeper/manifest.xml +++ b/smf/clickhouse_keeper/manifest.xml @@ -11,14 +11,17 @@ + + + + - - diff --git a/smf/clickhouse_keeper/method_script.sh b/smf/clickhouse_keeper/method_script.sh index 0e785f2aec..8499e0001f 100755 --- a/smf/clickhouse_keeper/method_script.sh +++ b/smf/clickhouse_keeper/method_script.sh @@ -9,24 +9,6 @@ set -o pipefail LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -DATALINK="$(svcprop -c -p config/datalink "${SMF_FMRI}")" -GATEWAY="$(svcprop -c -p config/gateway "${SMF_FMRI}")" - -if [[ $DATALINK == unknown ]] || [[ $GATEWAY == unknown ]]; then - printf 'ERROR: missing datalink or gateway\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" -fi - -# TODO remove when https://github.com/oxidecomputer/stlouis/issues/435 is addressed -ipadm delete-if "$DATALINK" || true -ipadm create-if -t "$DATALINK" - -ipadm set-ifprop -t -p mtu=9000 -m ipv4 "$DATALINK" -ipadm set-ifprop -t -p mtu=9000 -m ipv6 "$DATALINK" - -ipadm show-addr "$DATALINK/ll" || ipadm create-addr -t -T addrconf "$DATALINK/ll" -ipadm show-addr "$DATALINK/omicron6" || ipadm create-addr -t -T static -a "$LISTEN_ADDR" "$DATALINK/omicron6" -route get -inet6 default -inet6 "$GATEWAY" || route add -inet6 default -inet6 "$GATEWAY" # Retrieve hostnames (SRV records in internal DNS) of all keeper nodes. K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml index b4e69f6376..3a9b1a7cb8 100644 --- a/smf/cockroachdb/manifest.xml +++ b/smf/cockroachdb/manifest.xml @@ -11,6 +11,11 @@ + + + + @@ -23,8 +28,6 @@ - - diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index e5ab4e8eaa..e8b02eb1eb 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -9,24 +9,6 @@ set -o pipefail LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -DATALINK="$(svcprop -c -p config/datalink "${SMF_FMRI}")" -GATEWAY="$(svcprop -c -p config/gateway "${SMF_FMRI}")" - -if [[ $DATALINK == unknown ]] || [[ $GATEWAY == unknown ]]; then - printf 'ERROR: missing datalink or gateway\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" -fi - -# TODO remove when https://github.com/oxidecomputer/stlouis/issues/435 is addressed -ipadm delete-if "$DATALINK" || true -ipadm create-if -t "$DATALINK" - -ipadm set-ifprop -t -p mtu=9000 -m ipv4 "$DATALINK" -ipadm set-ifprop -t -p mtu=9000 -m ipv6 "$DATALINK" - -ipadm show-addr "$DATALINK/ll" || ipadm create-addr -t -T addrconf "$DATALINK/ll" -ipadm show-addr "$DATALINK/omicron6" || ipadm create-addr -t -T static -a "$LISTEN_ADDR" "$DATALINK/omicron6" -route get -inet6 default -inet6 "$GATEWAY" || route add -inet6 default -inet6 "$GATEWAY" # We need to tell CockroachDB the DNS names or IP addresses of the other nodes # in the cluster. Look these up in internal DNS. Per the recommendations in diff --git a/smf/zone-network-setup/manifest.xml b/smf/zone-network-setup/manifest.xml new file mode 100644 index 0000000000..0776329749 --- /dev/null +++ b/smf/zone-network-setup/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 4eda5c1af4..0240b45f90 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,8 +27,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } -clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.4.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } @@ -130,8 +130,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } -clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.4.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } diff --git a/zone-network-setup/Cargo.toml b/zone-network-setup/Cargo.toml new file mode 100644 index 0000000000..10eec5c554 --- /dev/null +++ b/zone-network-setup/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "zone-network-setup" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +clap.workspace = true +illumos-utils.workspace = true +omicron-common.workspace = true +slog.workspace = true +dropshot.workspace = true +tokio.workspace = true +omicron-workspace-hack.workspace = true diff --git a/zone-network-setup/src/bin/zone-networking.rs b/zone-network-setup/src/bin/zone-networking.rs new file mode 100644 index 0000000000..b955ca856a --- /dev/null +++ b/zone-network-setup/src/bin/zone-networking.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CLI to set up zone networking + +use anyhow::anyhow; +use clap::{arg, command}; +use illumos_utils::ipadm::Ipadm; +use illumos_utils::route::Route; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use slog::info; +use std::net::Ipv6Addr; + +fn parse_ipv6(s: &str) -> anyhow::Result { + if s == "unknown" { + return Err(anyhow!("ERROR: Missing input value")); + }; + s.parse().map_err(|_| anyhow!("ERROR: Invalid IPv6 address")) +} + +fn parse_datalink(s: &str) -> anyhow::Result { + if s == "unknown" { + return Err(anyhow!("ERROR: Missing data link")); + }; + s.parse().map_err(|_| anyhow!("ERROR: Invalid data link")) +} + +#[tokio::main] +async fn main() { + if let Err(message) = do_run().await { + fatal(message); + } +} + +async fn do_run() -> Result<(), CmdError> { + let log = dropshot::ConfigLogging::File { + path: "/dev/stderr".into(), + level: dropshot::ConfigLoggingLevel::Info, + if_exists: dropshot::ConfigLoggingIfExists::Append, + } + .to_logger("zone-networking") + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + let matches = command!() + .arg( + arg!( + -d --datalink "datalink" + ) + .required(true) + .value_parser(parse_datalink), + ) + .arg( + arg!( + -g --gateway "gateway" + ) + .required(true) + .value_parser(parse_ipv6), + ) + .arg( + arg!( + -s --static_addr "static_addr" + ) + .required(true) + .value_parser(parse_ipv6), + ) + .get_matches(); + + let datalink: &String = matches.get_one("datalink").unwrap(); + let static_addr: &Ipv6Addr = matches.get_one("static_addr").unwrap(); + let gateway: &Ipv6Addr = matches.get_one("gateway").unwrap(); + + // TODO: remove when https://github.com/oxidecomputer/stlouis/issues/435 is addressed + info!(&log, "Ensuring a temporary IP interface is created"; "data link" => ?datalink); + Ipadm::set_temp_interface_for_datalink(&datalink) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + info!(&log, "Setting MTU to 9000 for IPv6 and IPv4"; "data link" => ?datalink); + Ipadm::set_interface_mtu(&datalink) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + info!(&log, "Ensuring static and auto-configured addresses are set on the IP interface"; "data link" => ?datalink, "static address" => ?static_addr); + Ipadm::create_static_and_autoconfigured_addrs(&datalink, static_addr) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway); + Route::ensure_default_route_with_gateway(gateway) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + Ok(()) +} From df67a572756e0f348526a4a86ef691e4e917c6e1 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Tue, 16 Jan 2024 09:51:10 +0000 Subject: [PATCH 180/186] Create /etc/inet/hosts as part of zone networking setup (#4802) For self-assembly zones, create `/etc/inet/hosts` as part of configuring the local network interfaces. --- Cargo.lock | 1 + zone-network-setup/Cargo.toml | 1 + zone-network-setup/src/bin/zone-networking.rs | 21 ++++++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7db604dcb5..bcfcdc3082 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10310,6 +10310,7 @@ dependencies = [ "omicron-workspace-hack", "slog", "tokio", + "zone", ] [[package]] diff --git a/zone-network-setup/Cargo.toml b/zone-network-setup/Cargo.toml index 10eec5c554..28854b82f7 100644 --- a/zone-network-setup/Cargo.toml +++ b/zone-network-setup/Cargo.toml @@ -13,3 +13,4 @@ slog.workspace = true dropshot.workspace = true tokio.workspace = true omicron-workspace-hack.workspace = true +zone.workspace = true diff --git a/zone-network-setup/src/bin/zone-networking.rs b/zone-network-setup/src/bin/zone-networking.rs index b955ca856a..f3d18832c5 100644 --- a/zone-network-setup/src/bin/zone-networking.rs +++ b/zone-network-setup/src/bin/zone-networking.rs @@ -11,8 +11,11 @@ use illumos_utils::route::Route; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use slog::info; +use std::fs; use std::net::Ipv6Addr; +pub const HOSTS_FILE: &str = "/etc/inet/hosts"; + fn parse_ipv6(s: &str) -> anyhow::Result { if s == "unknown" { return Err(anyhow!("ERROR: Missing input value")); @@ -67,11 +70,14 @@ async fn do_run() -> Result<(), CmdError> { ) .get_matches(); + let zonename = + zone::current().await.expect("Could not determine local zone name"); let datalink: &String = matches.get_one("datalink").unwrap(); let static_addr: &Ipv6Addr = matches.get_one("static_addr").unwrap(); let gateway: &Ipv6Addr = matches.get_one("gateway").unwrap(); - // TODO: remove when https://github.com/oxidecomputer/stlouis/issues/435 is addressed + // TODO: remove when https://github.com/oxidecomputer/stlouis/issues/435 is + // addressed info!(&log, "Ensuring a temporary IP interface is created"; "data link" => ?datalink); Ipadm::set_temp_interface_for_datalink(&datalink) .map_err(|err| CmdError::Failure(anyhow!(err)))?; @@ -88,5 +94,18 @@ async fn do_run() -> Result<(), CmdError> { Route::ensure_default_route_with_gateway(gateway) .map_err(|err| CmdError::Failure(anyhow!(err)))?; + info!(&log, "Populating hosts file for zone"; "zonename" => ?zonename); + fs::write( + HOSTS_FILE, + format!( + r#" +::1 localhost loghost +127.0.0.1 localhost loghost +{static_addr} {zonename}.local {zonename} +"# + ), + ) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + Ok(()) } From 146aa5fdec6c527382af26c9369227d7c76ed8f8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Jan 2024 09:11:26 -0800 Subject: [PATCH 181/186] [package] Use topological sorting from omicron-zone-package (#4816) This functionality is now provided in `omicron-zone-package`, as of https://github.com/oxidecomputer/omicron-package/pull/57 --- Cargo.lock | 6 +-- Cargo.toml | 3 +- package/Cargo.toml | 1 - package/src/bin/omicron-package.rs | 59 +++++------------------------- 4 files changed, 13 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcfcdc3082..0b4a85b234 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4933,7 +4933,6 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.8", - "topological-sort", "walkdir", ] @@ -5201,9 +5200,9 @@ dependencies = [ [[package]] name = "omicron-zone-package" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "620c53207d39a385f298444337d575690e0d9e793561d471ba7a614dc213e372" +checksum = "cdfd257b7067e7a6aa9fba896a89b0f625bac7660213bb830db36e543bd3cdb8" dependencies = [ "anyhow", "async-trait", @@ -5222,6 +5221,7 @@ dependencies = [ "thiserror", "tokio", "toml 0.7.8", + "topological-sort", "walkdir", ] diff --git a/Cargo.toml b/Cargo.toml index 515e767bfb..974bf0b1ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,7 +261,7 @@ omicron-package = { path = "package" } omicron-rpaths = { path = "rpaths" } omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } -omicron-zone-package = "0.9.1" +omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4", features = [ "api", "std" ] } once_cell = "1.19.0" @@ -380,7 +380,6 @@ tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.8" toml_edit = "0.21.0" -topological-sort = "0.2.2" tough = { version = "0.16.0", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" diff --git a/package/Cargo.toml b/package/Cargo.toml index 6cc0e343db..0dc86ceb8c 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -30,7 +30,6 @@ tar.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true -topological-sort.workspace = true walkdir.workspace = true omicron-workspace-hack.workspace = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 357a217fe5..59c5c6ffe6 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -145,6 +145,7 @@ async fn do_for_all_rust_packages( let (release_pkgs, debug_pkgs): (Vec<_>, _) = config .package_config .packages_to_build(&config.target) + .0 .into_iter() .filter_map(|(name, pkg)| match &pkg.source { PackageSource::Local { rust: Some(rust_pkg), .. } => { @@ -463,8 +464,6 @@ async fn get_package( } async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { - use topological_sort::TopologicalSort; - create_dir_all(&output_directory) .map_err(|err| anyhow!("Cannot create output directory: {}", err))?; @@ -472,54 +471,12 @@ async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { do_build(&config).await?; - let mut all_packages = config - .package_config - .packages_to_build(&config.target) - .into_iter() - .map(|(package_name, package)| { - (package.get_output_file(package_name), (package_name, package)) - }) - .collect::>(); - - let mut outputs = TopologicalSort::::new(); - for (package_output, (_, package)) in &all_packages { - match &package.source { - PackageSource::Local { .. } - | PackageSource::Prebuilt { .. } - | PackageSource::Manual => { - // Skip intermediate leaf packages; if necessary they'll be - // added to the dependency graph by whatever composite package - // actually depends on them. - if !matches!( - package.output, - PackageOutput::Zone { intermediate_only: true } - ) { - outputs.insert(package_output); - } - } - PackageSource::Composite { packages: deps } => { - for dep in deps { - outputs.add_dependency(dep, package_output); - } - } - } - } - - while !outputs.is_empty() { - let batch = outputs.pop_all(); - assert!( - !batch.is_empty() || outputs.is_empty(), - "cyclic dependency in package manifest!" - ); - - let packages = batch.into_iter().map(|output| { - all_packages - .remove(&output) - .expect("package should've already been handled.") - }); + let packages = config.package_config.packages_to_build(&config.target); - let ui_refs = vec![ui.clone(); packages.len()]; - let pkg_stream = stream::iter(packages) + let package_iter = packages.build_order(); + for batch in package_iter { + let ui_refs = vec![ui.clone(); batch.len()]; + let pkg_stream = stream::iter(batch) .zip(stream::iter(ui_refs)) .map(Ok::<_, anyhow::Error>) .try_for_each_concurrent( @@ -553,6 +510,7 @@ async fn do_stamp( let (_name, package) = config .package_config .packages_to_deploy(&config.target) + .0 .into_iter() .find(|(name, _pkg)| name.as_str() == package_name) .ok_or_else(|| anyhow!("Package {package_name} not found"))?; @@ -574,7 +532,7 @@ async fn do_unpack( })?; // Copy all packages to the install location in parallel. - let packages = config.package_config.packages_to_deploy(&config.target); + let packages = config.package_config.packages_to_deploy(&config.target).0; packages.par_iter().try_for_each( |(package_name, package)| -> Result<()> { @@ -704,6 +662,7 @@ fn uninstall_all_packages(config: &Config) { for (_, package) in config .package_config .packages_to_deploy(&config.target) + .0 .into_iter() .filter(|(_, package)| matches!(package.output, PackageOutput::Tarball)) { From 4796057efa48d062e40c8bb8c663ac04131a9242 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 16 Jan 2024 14:51:45 -0600 Subject: [PATCH 182/186] Fix duplicate entries in IP pools list (#4808) Bugfix for #4261. Using `distinct` to eliminate dupes from the left outer join works, but it's better to just use the well-known name of the service IP pool to exclude it from whatever operations it needs to be excluded from. Potential followup related to #4762: if the ID was well-known to, we could do the `is_internal` check without having to hit the DB. --- nexus/db-queries/src/db/datastore/ip_pool.rs | 59 ++----------- nexus/tests/integration_tests/endpoints.rs | 8 +- nexus/tests/integration_tests/ip_pools.rs | 89 +++++++++++++++----- 3 files changed, 81 insertions(+), 75 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index f51f54d592..c9fdb5f0ee 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -10,11 +10,12 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; +use crate::db::datastore::SERVICE_IP_POOL_NAME; use crate::db::error::public_error_from_diesel; use crate::db::error::public_error_from_diesel_lookup; use crate::db::error::ErrorHandler; -use crate::db::fixed_data::silo::INTERNAL_SILO_ID; use crate::db::identity::Resource; +use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; use crate::db::model::IpKind; use crate::db::model::IpPool; @@ -56,7 +57,6 @@ impl DataStore { pagparams: &PaginatedBy<'_>, ) -> ListResultVec { use db::schema::ip_pool; - use db::schema::ip_pool_resource; opctx .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) @@ -71,14 +71,7 @@ impl DataStore { &pagparams.map_name(|n| Name::ref_cast(n)), ), } - .left_outer_join(ip_pool_resource::table) - .filter( - ip_pool_resource::resource_id - .ne(*INTERNAL_SILO_ID) - // resource_id is not nullable -- null here means the - // pool has no entry in the join table - .or(ip_pool_resource::resource_id.is_null()), - ) + .filter(ip_pool::name.ne(SERVICE_IP_POOL_NAME)) .filter(ip_pool::time_deleted.is_null()) .select(IpPool::as_select()) .get_results_async(&*self.pool_connection_authorized(opctx).await?) @@ -225,48 +218,15 @@ impl DataStore { }) } - /// Looks up an IP pool intended for internal services. + /// Look up IP pool intended for internal services by its well-known name. /// /// This method may require an index by Availability Zone in the future. pub async fn ip_pools_service_lookup( &self, opctx: &OpContext, ) -> LookupResult<(authz::IpPool, IpPool)> { - use db::schema::ip_pool; - use db::schema::ip_pool_resource; - - opctx - .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) - .await?; - - // Look up IP pool by its association with the internal silo. - // We assume there is only one pool for that silo, or at least, - // if there is more than one, it doesn't matter which one we pick. - let (authz_pool, pool) = ip_pool::table - .inner_join(ip_pool_resource::table) - .filter(ip_pool::time_deleted.is_null()) - .filter( - ip_pool_resource::resource_type - .eq(IpPoolResourceType::Silo) - .and(ip_pool_resource::resource_id.eq(*INTERNAL_SILO_ID)), - ) - .select(IpPool::as_select()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - .map(|ip_pool| { - ( - authz::IpPool::new( - authz::FLEET, - ip_pool.id(), - LookupType::ByCompositeId( - "Service IP Pool".to_string(), - ), - ), - ip_pool, - ) - })?; - Ok((authz_pool, pool)) + let name = SERVICE_IP_POOL_NAME.parse().unwrap(); + LookupPath::new(&opctx, self).ip_pool_name(&Name(name)).fetch().await } /// Creates a new IP pool. @@ -374,15 +334,10 @@ impl DataStore { authz_pool: &authz::IpPool, ) -> LookupResult { use db::schema::ip_pool; - use db::schema::ip_pool_resource; ip_pool::table - .inner_join(ip_pool_resource::table) .filter(ip_pool::id.eq(authz_pool.id())) - .filter( - ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo), - ) - .filter(ip_pool_resource::resource_id.eq(*INTERNAL_SILO_ID)) + .filter(ip_pool::name.eq(SERVICE_IP_POOL_NAME)) .filter(ip_pool::time_deleted.is_null()) .select(ip_pool::id) .first_async::( diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index b7b838ca50..11bfa34c5f 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -1048,7 +1048,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { // IP Pool endpoint (Oxide services) VerifyEndpoint { url: &DEMO_IP_POOL_SERVICE_URL, - visibility: Visibility::Public, + visibility: Visibility::Protected, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ AllowedMethod::Get @@ -1058,7 +1058,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { // IP Pool ranges endpoint (Oxide services) VerifyEndpoint { url: &DEMO_IP_POOL_SERVICE_RANGES_URL, - visibility: Visibility::Public, + visibility: Visibility::Protected, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ AllowedMethod::Get @@ -1068,7 +1068,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { // IP Pool ranges/add endpoint (Oxide services) VerifyEndpoint { url: &DEMO_IP_POOL_SERVICE_RANGES_ADD_URL, - visibility: Visibility::Public, + visibility: Visibility::Protected, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ AllowedMethod::Post( @@ -1080,7 +1080,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { // IP Pool ranges/delete endpoint (Oxide services) VerifyEndpoint { url: &DEMO_IP_POOL_SERVICE_RANGES_DEL_URL, - visibility: Visibility::Public, + visibility: Visibility::Protected, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ AllowedMethod::Post( diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 5682df2c3a..d97eda9a0b 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -17,6 +17,7 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::link_ip_pool; use nexus_test_utils::resource_helpers::object_create; use nexus_test_utils::resource_helpers::object_create_error; @@ -36,6 +37,7 @@ use nexus_types::external_api::params::IpPoolUpdate; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; use nexus_types::external_api::shared::Ipv6Range; +use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; use nexus_types::external_api::views::IpPoolSilo; @@ -43,6 +45,7 @@ use nexus_types::external_api::views::Silo; use nexus_types::identity::Resource; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::SimpleIdentity; use omicron_common::api::external::{IdentityMetadataCreateParams, Name}; use omicron_nexus::TestInterfaces; use sled_agent_client::TestInterfaces as SledTestInterfaces; @@ -62,16 +65,7 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { let ip_pool_ranges_url = format!("{}/ranges", ip_pool_url); let ip_pool_add_range_url = format!("{}/add", ip_pool_ranges_url); - // Verify the list of IP pools is empty - let ip_pools = NexusRequest::iter_collection_authn::( - client, - ip_pools_url, - "", - None, - ) - .await - .expect("Failed to list IP Pools") - .all_items; + let ip_pools = get_ip_pools(&client).await; assert_eq!(ip_pools.len(), 0, "Expected empty list of IP pools"); // Verify 404 if the pool doesn't exist yet, both for creating or deleting @@ -102,15 +96,7 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { assert_eq!(created_pool.identity.name, pool_name); assert_eq!(created_pool.identity.description, description); - let list = NexusRequest::iter_collection_authn::( - client, - ip_pools_url, - "", - None, - ) - .await - .expect("Failed to list IP Pools") - .all_items; + let list = get_ip_pools(client).await; assert_eq!(list.len(), 1, "Expected exactly 1 IP pool"); assert_pools_eq(&created_pool, &list[0]); @@ -212,6 +198,71 @@ async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { .expect("Expected to be able to delete an empty IP Pool"); } +async fn get_ip_pools(client: &ClientTestContext) -> Vec { + NexusRequest::iter_collection_authn::( + client, + "/v1/system/ip-pools", + "", + None, + ) + .await + .expect("Failed to list IP Pools") + .all_items +} + +// this test exists primarily because of a bug in the initial implementation +// where we included a duplicate of each pool in the list response for every +// associated silo +#[nexus_test] +async fn test_ip_pool_list_dedupe(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let ip_pools = get_ip_pools(&client).await; + assert_eq!(ip_pools.len(), 0); + + let range1 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 51), + std::net::Ipv4Addr::new(10, 0, 0, 52), + ) + .unwrap(), + ); + let (pool1, ..) = create_ip_pool(client, "pool1", Some(range1)).await; + let range2 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 53), + std::net::Ipv4Addr::new(10, 0, 0, 54), + ) + .unwrap(), + ); + let (pool2, ..) = create_ip_pool(client, "pool2", Some(range2)).await; + + let ip_pools = get_ip_pools(&client).await; + assert_eq!(ip_pools.len(), 2); + assert_eq!(ip_pools[0].identity.id, pool1.id()); + assert_eq!(ip_pools[1].identity.id, pool2.id()); + + // create 3 silos and link + let silo1 = + create_silo(&client, "silo1", true, SiloIdentityMode::SamlJit).await; + link_ip_pool(client, "pool1", &silo1.id(), false).await; + // linking pool2 here only, just for variety + link_ip_pool(client, "pool2", &silo1.id(), false).await; + + let silo2 = + create_silo(&client, "silo2", true, SiloIdentityMode::SamlJit).await; + link_ip_pool(client, "pool1", &silo2.id(), true).await; + + let silo3 = + create_silo(&client, "silo3", true, SiloIdentityMode::SamlJit).await; + link_ip_pool(client, "pool1", &silo3.id(), true).await; + + let ip_pools = get_ip_pools(&client).await; + assert_eq!(ip_pools.len(), 2); + assert_eq!(ip_pools[0].identity.id, pool1.id()); + assert_eq!(ip_pools[1].identity.id, pool2.id()); +} + /// The internal IP pool, defined by its association with the internal silo, /// cannot be interacted with through the operator API. CRUD operations should /// all 404 except fetch by name or ID. From 98b4b194bdee287f1980a21f19e3c96de7a4dd50 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Tue, 16 Jan 2024 13:45:25 -0800 Subject: [PATCH 183/186] don't enable chrony in the gz, it runs in the ntp zone (#4821) --- tools/install_runner_prerequisites.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index 42347f518d..2a29e97085 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -120,8 +120,6 @@ function install_packages { exit "$rc" fi - pfexec svcadm enable chrony - pkg list -v "${packages[@]}" elif [[ "${HOST_OS}" == "Linux" ]]; then packages=( From 133a76b0ae08bda4f99648ebec6054f6e53f4a6d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:00:17 -0800 Subject: [PATCH 184/186] Update Rust crate rcgen to 0.12.0 (#4815) --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b4a85b234..fd42c987ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6571,12 +6571,12 @@ dependencies = [ [[package]] name = "rcgen" -version = "0.11.3" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c4f3084aa3bc7dfbba4eff4fab2a54db4324965d8872ab933565e6fbd83bc6" +checksum = "5d918c80c5a4c7560db726763020bd16db179e4d5b828078842274a443addb5d" dependencies = [ "pem", - "ring 0.16.20", + "ring 0.17.7", "time", "yasna", ] diff --git a/Cargo.toml b/Cargo.toml index 974bf0b1ac..c3e59c901e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -305,7 +305,7 @@ quote = "1.0" rand = "0.8.5" ratatui = "0.23.0" rayon = "1.8" -rcgen = "0.11.3" +rcgen = "0.12.0" reedline = "0.22.0" ref-cast = "1.0" regex = "1.10.2" From c20fa5a6631a82167fe967a06e638e2cca7d238b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Jan 2024 14:15:27 -0800 Subject: [PATCH 185/186] Co-locate 'data' datasets with 'zone' datasets (#4820) Fixes https://github.com/oxidecomputer/omicron/issues/4819 --- sled-agent/src/services.rs | 53 ++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index adabe80807..c068515d14 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2887,12 +2887,9 @@ impl ServiceManager { } // Create zones that should be running - let all_u2_roots = self - .inner - .storage - .get_latest_resources() - .await - .all_u2_mountpoints(ZONE_DATASET); + let storage = self.inner.storage.get_latest_resources().await; + let all_u2_pools = storage.all_u2_zpools(); + let mut new_zones = Vec::new(); for zone in zones_to_be_added { // Check if we think the zone should already be running @@ -2926,17 +2923,41 @@ impl ServiceManager { } } - // For each new zone request, we pick an arbitrary U.2 to store - // the zone filesystem. Note: This isn't known to Nexus right now, - // so it's a local-to-sled decision. + // For each new zone request, we pick a U.2 to store the zone + // filesystem. Note: This isn't known to Nexus right now, so it's a + // local-to-sled decision. // - // This is (currently) intentional, as the zone filesystem should - // be destroyed between reboots. - let mut rng = rand::thread_rng(); - let root = all_u2_roots - .choose(&mut rng) - .ok_or_else(|| Error::U2NotFound)? - .clone(); + // Currently, the zone filesystem should be destroyed between + // reboots, so it's fine to make this decision locally. + let root = if let Some(dataset) = zone.dataset_name() { + // If the zone happens to already manage a dataset, then + // we co-locate the zone dataset on the same zpool. + // + // This slightly reduces the underlying fault domain for the + // service. + let data_pool = dataset.pool(); + if !all_u2_pools.contains(&data_pool) { + warn!( + log, + "zone dataset requested on a zpool which doesn't exist"; + "zone" => &name, + "zpool" => %data_pool + ); + return Err(Error::MissingDevice { + device: format!("zpool: {data_pool}"), + }); + } + data_pool.dataset_mountpoint(ZONE_DATASET) + } else { + // If the zone it not coupled to other datsets, we pick one + // arbitrarily. + let mut rng = rand::thread_rng(); + all_u2_pools + .choose(&mut rng) + .map(|pool| pool.dataset_mountpoint(ZONE_DATASET)) + .ok_or_else(|| Error::U2NotFound)? + .clone() + }; new_zones.push(OmicronZoneConfigLocal { zone: zone.clone(), root }); } From d23d2fc30573ea451f7c9f3cc5dc9a4e8cc2e317 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:31:07 -0800 Subject: [PATCH 186/186] Update actions/setup-node action to v4.0.1 (#4774) --- .github/workflows/validate-openapi-spec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 10f1dd5b46..a76567af2a 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: actions/setup-node@8f152de45cc393bb48ce5d89d36b731f54556e65 # v4.0.0 + - uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1 with: node-version: '18' - name: Install our tools