From 81842f6803fae98e713f996600c6dd5ccb0add41 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 12 Jan 2024 14:51:05 -0800 Subject: [PATCH] cleanup, programmatic diff, fix up test --- Cargo.lock | 2 - clients/nexus-client/src/lib.rs | 24 - clients/sled-agent-client/src/lib.rs | 1 - nexus/deployment/Cargo.toml | 2 - nexus/deployment/src/blueprint_builder.rs | 6 +- nexus/deployment/src/planner.rs | 97 ++-- .../deployment/tests/output/planner_basic.out | 108 ---- nexus/src/app/deployment.rs | 4 +- nexus/src/internal_api/http_entrypoints.rs | 46 +- nexus/types/src/deployment.rs | 502 +++++++++++------- openapi/nexus-internal.json | 18 +- 11 files changed, 410 insertions(+), 400 deletions(-) delete mode 100644 nexus/deployment/tests/output/planner_basic.out diff --git a/Cargo.lock b/Cargo.lock index ebed8518a4..485eb1b061 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4221,7 +4221,6 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "expectorate", "internal-dns", "ipnet", "ipnetwork", @@ -4230,7 +4229,6 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "rand 0.8.5", "sled-agent-client", "slog", "thiserror", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 30dfc8c712..1e1cbc31e7 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -36,30 +36,6 @@ progenitor::generate_api!( } ); -impl types::OmicronZoneType { - /// Human-readable label describing what kind of zone this is - /// - /// This is just use for testing and reporting. - /// Note that this function is identical to its analog in sled-agent-client. - pub fn label(&self) -> impl std::fmt::Display { - match self { - types::OmicronZoneType::BoundaryNtp { .. } => "boundary_ntp", - types::OmicronZoneType::Clickhouse { .. } => "clickhouse", - types::OmicronZoneType::ClickhouseKeeper { .. } => { - "clickhouse_keeper" - } - types::OmicronZoneType::CockroachDb { .. } => "cockroach_db", - types::OmicronZoneType::Crucible { .. } => "crucible", - types::OmicronZoneType::CruciblePantry { .. } => "crucible_pantry", - types::OmicronZoneType::ExternalDns { .. } => "external_dns", - types::OmicronZoneType::InternalDns { .. } => "internal_dns", - types::OmicronZoneType::InternalNtp { .. } => "internal_ntp", - types::OmicronZoneType::Nexus { .. } => "nexus", - types::OmicronZoneType::Oximeter { .. } => "oximeter", - } - } -} - impl omicron_common::api::external::ClientError for types::Error { fn message(&self) -> String { self.message.clone() diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 428dcf3645..610ea5a510 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -51,7 +51,6 @@ impl types::OmicronZoneType { /// Human-readable label describing what kind of zone this is /// /// This is just use for testing and reporting. - /// Note that this function is identical to its analog in nexus-client. pub fn label(&self) -> impl std::fmt::Display { match self { types::OmicronZoneType::BoundaryNtp { .. } => "boundary_ntp", diff --git a/nexus/deployment/Cargo.toml b/nexus/deployment/Cargo.toml index e6c33b5efd..b166f947bf 100644 --- a/nexus/deployment/Cargo.toml +++ b/nexus/deployment/Cargo.toml @@ -18,8 +18,6 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -expectorate.workspace = true nexus-inventory.workspace = true omicron-test-utils.workspace = true -rand.workspace = true sled-agent-client.workspace = true diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index dd43e522a4..4396221d50 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -334,8 +334,12 @@ impl<'a> BlueprintBuilder<'a> { zones: old_sled_zones.zones.clone(), } } else { + // The first generation is reserved to mean the one + // containing no zones. See + // OMICRON_ZONES_CONFIG_INITIAL_GENERATION. So we start + // with the next one. OmicronZonesConfig { - generation: Generation::new(), + generation: Generation::new().next(), zones: vec![], } } diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index d79e5d4b1b..ba1f29c42b 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -111,15 +111,12 @@ mod test { use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_test_utils::dev::test_setup_log; - use rand::Fill; - use rand::SeedableRng; use sled_agent_client::types::{ Baseboard, Inventory, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, OmicronZonesConfig, SledRole, }; use std::collections::BTreeMap; use std::collections::BTreeSet; - use std::fmt::Write; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::str::FromStr; @@ -150,20 +147,6 @@ mod test { fn example() -> (Collection, Policy) { let mut builder = nexus_inventory::CollectionBuilder::new("test-suite"); - // We want deterministic uuid generation in this test so that we get - // consistent output for expectorate. - // XXX-dap this is not quite good enough because the blueprint ids and - // the ids for new sleds and zones are generated elsewhere with - // Uuid::new_v4(). What to do? Pass the RNG around everywhere? - // Find/replace these uuids in the output? Add a programmatic interface - // to the differ? - let mut rng = rand::rngs::StdRng::from_seed(Default::default()); - fn new_uuid(r: &mut rand::rngs::StdRng) -> Uuid { - let mut bytes = uuid::Bytes::default(); - bytes.try_fill(r).unwrap(); - uuid::Builder::from_random_bytes(bytes).into_uuid() - } - let sled_ids = [ "72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b", "a5f3db3a-61aa-4f90-ad3e-02833c253bf5", @@ -199,7 +182,7 @@ mod test { let zpools = &policy.sleds.get(&sled_id).unwrap().zpools; let ip1 = sled_ip.saturating_add(1); let zones: Vec<_> = std::iter::once(OmicronZoneConfig { - id: new_uuid(&mut rng), + id: Uuid::new_v4(), underlay_address: sled_ip.saturating_add(1), zone_type: OmicronZoneType::InternalNtp { address: SocketAddrV6::new(ip1, 12345, 0, 0).to_string(), @@ -211,7 +194,7 @@ mod test { .chain(zpools.iter().enumerate().map(|(i, zpool_name)| { let ip = sled_ip.saturating_add(u128::try_from(i + 2).unwrap()); OmicronZoneConfig { - id: new_uuid(&mut rng), + id: Uuid::new_v4(), underlay_address: ip, zone_type: OmicronZoneType::Crucible { address: String::from("[::1]:12345"), @@ -245,9 +228,6 @@ mod test { fn test_basic_add_sled() { let logctx = test_setup_log("planner_basic_add_sled"); - // Assemble an output string that we'll check at the end. - let mut out = String::new(); - // Use our example inventory collection. let (collection, mut policy) = example(); @@ -273,12 +253,11 @@ mod test { .plan() .expect("failed to plan"); - writeln!( - &mut out, - "1 -> 2 (expected no changes):\n{}", - blueprint1.diff(&blueprint2) - ) - .unwrap(); + let diff = blueprint1.diff(&blueprint2); + println!("1 -> 2 (expected no changes):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); // Now add a new sled. let new_sled_id = @@ -295,12 +274,22 @@ mod test { .plan() .expect("failed to plan"); - writeln!( - &mut out, - "2 -> 3 (expect new NTP zone on new sled):\n{}", - blueprint2.diff(&blueprint3) - ) - .unwrap(); + let diff = blueprint2.diff(&blueprint3); + println!("2 -> 3 (expect new NTP zone on new sled):\n{}", diff,); + let sleds = diff.sleds_added().collect::>(); + let (sled_id, sled_zones) = sleds[0]; + // We have defined elsewhere that the first generation contains no + // zones. So the first one with zones must be newer. See + // OMICRON_ZONES_CONFIG_INITIAL_GENERATION. + assert!(sled_zones.generation > Generation::new()); + assert_eq!(sled_id, new_sled_id); + assert_eq!(sled_zones.zones.len(), 1); + assert!(matches!( + sled_zones.zones[0].zone_type, + OmicronZoneType::InternalNtp { .. } + )); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); // Check that the next step is to add Crucible zones let blueprint4 = Planner::new_based_on( @@ -312,12 +301,27 @@ mod test { .plan() .expect("failed to plan"); - writeln!( - &mut out, - "3 -> 4 (expect Crucible zones):\n{}", - blueprint3.diff(&blueprint4) - ) - .unwrap(); + let diff = blueprint3.diff(&blueprint4); + println!("3 -> 4 (expect Crucible zones):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + let sleds = diff.sleds_changed().collect::>(); + assert_eq!(sleds.len(), 1); + let (sled_id, sled_changes) = &sleds[0]; + assert_eq!( + sled_changes.generation_after, + sled_changes.generation_before.next() + ); + assert_eq!(*sled_id, new_sled_id); + assert_eq!(sled_changes.zones_removed().count(), 0); + assert_eq!(sled_changes.zones_changed().count(), 0); + let zones = sled_changes.zones_added().collect::>(); + assert_eq!(zones.len(), 3); + for zone in &zones { + let OmicronZoneType::Crucible { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Crucible zone"); + }; + } // Check that there are no more steps let blueprint5 = Planner::new_based_on( @@ -329,14 +333,11 @@ mod test { .plan() .expect("failed to plan"); - writeln!( - &mut out, - "4 -> 5 (expect no changes):\n{}", - blueprint4.diff(&blueprint5) - ) - .unwrap(); - - expectorate::assert_contents("tests/output/planner_basic.out", &out); + let diff = blueprint4.diff(&blueprint5); + println!("4 -> 5 (expect no changes):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); logctx.cleanup_successful(); } diff --git a/nexus/deployment/tests/output/planner_basic.out b/nexus/deployment/tests/output/planner_basic.out deleted file mode 100644 index d60570ba20..0000000000 --- a/nexus/deployment/tests/output/planner_basic.out +++ /dev/null @@ -1,108 +0,0 @@ -1 -> 2 (expected no changes): -diff blueprints c50d0bdf-bc2b-45f4-9624-c5f2d11a4099 d70a3187-4165-49df-83b6-40cec55e5526 ---- c50d0bdf-bc2b-45f4-9624-c5f2d11a4099 -+++ d70a3187-4165-49df-83b6-40cec55e5526 - sled 0d168386-2551-44e8-98dd-ae7a7570f8a0 - zone config generation 2 - zone 10b0a742-f1e4-4238-a7a4-5cae054ec21c type crucible (unchanged) - zone 68bfd3a6-fe97-493e-8846-351596cca8ab type crucible (unchanged) - zone b20cd67c-4bde-4c53-8e8b-e990c1b6425d type internal_ntp (unchanged) - zone f59fddd0-b7f5-4dcc-8c60-a448cbf95116 type crucible (unchanged) - sled 72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b - zone config generation 2 - zone 0429c3bb-49e0-4414-be00-89a52eae155f type crucible (unchanged) - zone 0564f879-d27a-43c0-ace8-2834acfa8c79 type crucible (unchanged) - zone 3a629f2c-a0de-4919-a10b-e82f411326be type crucible (unchanged) - zone 9bf49a6a-0755-4953-811f-ce125f2683d5 type internal_ntp (unchanged) - sled a5f3db3a-61aa-4f90-ad3e-02833c253bf5 - zone config generation 2 - zone 0bd58841-203e-44fe-86fc-71338ce0173d type internal_ntp (unchanged) - zone 42258dcd-a14c-4111-8602-b8971b8cc843 type crucible (unchanged) - zone c628ebb7-19bd-4bcc-9515-85214cc089b4 type crucible (unchanged) - zone e91e46ca-9051-41c0-a744-a6b017e69316 type crucible (unchanged) - -2 -> 3 (expect new NTP zone on new sled): -diff blueprints d70a3187-4165-49df-83b6-40cec55e5526 6cfe0985-3caf-4ed3-a528-dd65980a70e7 ---- d70a3187-4165-49df-83b6-40cec55e5526 -+++ 6cfe0985-3caf-4ed3-a528-dd65980a70e7 - sled 0d168386-2551-44e8-98dd-ae7a7570f8a0 - zone config generation 2 - zone 10b0a742-f1e4-4238-a7a4-5cae054ec21c type crucible (unchanged) - zone 68bfd3a6-fe97-493e-8846-351596cca8ab type crucible (unchanged) - zone b20cd67c-4bde-4c53-8e8b-e990c1b6425d type internal_ntp (unchanged) - zone f59fddd0-b7f5-4dcc-8c60-a448cbf95116 type crucible (unchanged) - sled 72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b - zone config generation 2 - zone 0429c3bb-49e0-4414-be00-89a52eae155f type crucible (unchanged) - zone 0564f879-d27a-43c0-ace8-2834acfa8c79 type crucible (unchanged) - zone 3a629f2c-a0de-4919-a10b-e82f411326be type crucible (unchanged) - zone 9bf49a6a-0755-4953-811f-ce125f2683d5 type internal_ntp (unchanged) - sled a5f3db3a-61aa-4f90-ad3e-02833c253bf5 - zone config generation 2 - zone 0bd58841-203e-44fe-86fc-71338ce0173d type internal_ntp (unchanged) - zone 42258dcd-a14c-4111-8602-b8971b8cc843 type crucible (unchanged) - zone c628ebb7-19bd-4bcc-9515-85214cc089b4 type crucible (unchanged) - zone e91e46ca-9051-41c0-a744-a6b017e69316 type crucible (unchanged) -+ sled 7097f5b3-5896-4fff-bd97-63a9a69563a9 (added) -+ zone config generation 1 -+ zone 00953b9b-1c7f-4de6-bf67-83b5fd084cd0 type internal_ntp (added) - -3 -> 4 (expect Crucible zones): -diff blueprints 6cfe0985-3caf-4ed3-a528-dd65980a70e7 cb191215-ed99-4253-8b9e-0b7765575d00 ---- 6cfe0985-3caf-4ed3-a528-dd65980a70e7 -+++ cb191215-ed99-4253-8b9e-0b7765575d00 - sled 0d168386-2551-44e8-98dd-ae7a7570f8a0 - zone config generation 2 - zone 10b0a742-f1e4-4238-a7a4-5cae054ec21c type crucible (unchanged) - zone 68bfd3a6-fe97-493e-8846-351596cca8ab type crucible (unchanged) - zone b20cd67c-4bde-4c53-8e8b-e990c1b6425d type internal_ntp (unchanged) - zone f59fddd0-b7f5-4dcc-8c60-a448cbf95116 type crucible (unchanged) - sled 7097f5b3-5896-4fff-bd97-63a9a69563a9 -- zone config generation 1 -+ zone config generation 2 - zone 00953b9b-1c7f-4de6-bf67-83b5fd084cd0 type internal_ntp (unchanged) -+ zone 8208d613-e5c7-4c4d-9eb3-d507dfc7a6c0 type crucible (added) -+ zone aeaf5849-a808-4e2b-8f5b-addad46d1107 type crucible (added) -+ zone f67db542-2909-4c0b-96a8-8599929a1e6a type crucible (added) - sled 72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b - zone config generation 2 - zone 0429c3bb-49e0-4414-be00-89a52eae155f type crucible (unchanged) - zone 0564f879-d27a-43c0-ace8-2834acfa8c79 type crucible (unchanged) - zone 3a629f2c-a0de-4919-a10b-e82f411326be type crucible (unchanged) - zone 9bf49a6a-0755-4953-811f-ce125f2683d5 type internal_ntp (unchanged) - sled a5f3db3a-61aa-4f90-ad3e-02833c253bf5 - zone config generation 2 - zone 0bd58841-203e-44fe-86fc-71338ce0173d type internal_ntp (unchanged) - zone 42258dcd-a14c-4111-8602-b8971b8cc843 type crucible (unchanged) - zone c628ebb7-19bd-4bcc-9515-85214cc089b4 type crucible (unchanged) - zone e91e46ca-9051-41c0-a744-a6b017e69316 type crucible (unchanged) - -4 -> 5 (expect no changes): -diff blueprints cb191215-ed99-4253-8b9e-0b7765575d00 ee8b17cb-b270-488d-9fd9-f90bfec37b8f ---- cb191215-ed99-4253-8b9e-0b7765575d00 -+++ ee8b17cb-b270-488d-9fd9-f90bfec37b8f - sled 0d168386-2551-44e8-98dd-ae7a7570f8a0 - zone config generation 2 - zone 10b0a742-f1e4-4238-a7a4-5cae054ec21c type crucible (unchanged) - zone 68bfd3a6-fe97-493e-8846-351596cca8ab type crucible (unchanged) - zone b20cd67c-4bde-4c53-8e8b-e990c1b6425d type internal_ntp (unchanged) - zone f59fddd0-b7f5-4dcc-8c60-a448cbf95116 type crucible (unchanged) - sled 7097f5b3-5896-4fff-bd97-63a9a69563a9 - zone config generation 2 - zone 00953b9b-1c7f-4de6-bf67-83b5fd084cd0 type internal_ntp (unchanged) - zone 8208d613-e5c7-4c4d-9eb3-d507dfc7a6c0 type crucible (unchanged) - zone aeaf5849-a808-4e2b-8f5b-addad46d1107 type crucible (unchanged) - zone f67db542-2909-4c0b-96a8-8599929a1e6a type crucible (unchanged) - sled 72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b - zone config generation 2 - zone 0429c3bb-49e0-4414-be00-89a52eae155f type crucible (unchanged) - zone 0564f879-d27a-43c0-ace8-2834acfa8c79 type crucible (unchanged) - zone 3a629f2c-a0de-4919-a10b-e82f411326be type crucible (unchanged) - zone 9bf49a6a-0755-4953-811f-ce125f2683d5 type internal_ntp (unchanged) - sled a5f3db3a-61aa-4f90-ad3e-02833c253bf5 - zone config generation 2 - zone 0bd58841-203e-44fe-86fc-71338ce0173d type internal_ntp (unchanged) - zone 42258dcd-a14c-4111-8602-b8971b8cc843 type crucible (unchanged) - zone c628ebb7-19bd-4bcc-9515-85214cc089b4 type crucible (unchanged) - zone e91e46ca-9051-41c0-a744-a6b017e69316 type crucible (unchanged) - diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 444685e080..05375997a3 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -11,7 +11,6 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; -use nexus_types::deployment::params; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::Policy; @@ -35,6 +34,7 @@ use std::collections::BTreeSet; use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; +use nexus_types::deployment::BlueprintTargetSet; /// "limit" used in SQL queries that paginate through all sleds, zpools, etc. // unsafe: `new_unchecked` is only unsound if the argument is 0. @@ -159,7 +159,7 @@ impl super::Nexus { pub async fn blueprint_target_set( &self, opctx: &OpContext, - params: params::BlueprintTargetSet, + params: BlueprintTargetSet, ) -> Result { opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; let new_target_id = params.target_id; diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 481ca4f85a..42f7726495 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -25,9 +25,8 @@ use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; -use nexus_types::deployment::params as deployment_params; -use nexus_types::deployment::views as deployment_views; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintTargetSet; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -46,6 +45,7 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; +use serde::Serialize; use std::collections::BTreeMap; use std::sync::Arc; use uuid::Uuid; @@ -679,6 +679,35 @@ async fn blueprint_delete( // Managing the current target blueprint +/// Describes what blueprint, if any, the system is currently working toward +#[derive(Debug, Serialize, JsonSchema)] +pub struct BlueprintTarget { + /// id of the blueprint that the system is trying to make real + pub target_id: Uuid, + /// policy: should the system actively work towards this blueprint + /// + /// This should generally be left enabled. + pub enabled: bool, + /// when this blueprint was made the target + pub time_set: chrono::DateTime, +} + +impl TryFrom for BlueprintTarget { + type Error = Error; + + fn try_from( + value: nexus_types::deployment::BlueprintTarget, + ) -> Result { + Ok(BlueprintTarget { + target_id: value.target_id.ok_or_else(|| Error::conflict( + "no target blueprint has been configured", + ))?, + enabled: value.enabled, + time_set: value.time_set, + }) + } +} + /// Fetches the current target blueprint, if any #[endpoint { method = GET, @@ -686,16 +715,13 @@ async fn blueprint_delete( }] async fn blueprint_target_view( rqctx: RequestContext>, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; let target = nexus.blueprint_target_view(&opctx).await?; - Ok(HttpResponseOk( - deployment_views::BlueprintTarget::try_from(target) - .map_err(|e| Error::conflict(e.to_string()))?, - )) + Ok(HttpResponseOk(BlueprintTarget::try_from(target)?)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -707,8 +733,8 @@ async fn blueprint_target_view( }] async fn blueprint_target_set( rqctx: RequestContext>, - target: TypedBody, -) -> Result, HttpError> { + target: TypedBody, +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; @@ -716,7 +742,7 @@ async fn blueprint_target_set( let target = target.into_inner(); let result = nexus.blueprint_target_set(&opctx, target).await?; Ok(HttpResponseOk( - deployment_views::BlueprintTarget::try_from(result) + BlueprintTarget::try_from(result) .map_err(|e| Error::conflict(e.to_string()))?, )) }; diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index d37badc45b..948ac331ba 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -18,6 +18,7 @@ pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::ZpoolName; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::Generation; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -62,44 +63,43 @@ pub struct SledResources { } /// Describes a complete set of software and configuration for the system -/// -/// Blueprints are a fundamental part of how the system modifies itself. Each -/// blueprint completely describes all of the software and configuration -/// that the control plane manages. See the nexus/deployment crate-level -/// documentation for details. -/// -/// Blueprints are different from policy. Policy describes the things that an -/// operator would generally want to control. The blueprint describes the -/// details of implementing that policy that an operator shouldn't have to deal -/// with. For example, the operator might write policy that says "I want -/// 5 external DNS zones". The system could then generate a blueprint that -/// _has_ 5 external DNS zones on 5 specific sleds. The blueprint includes all -/// the details needed to achieve that, including which image these zones should -/// run, which zpools their persistent data should be stored on, their public and -/// private IP addresses, their internal DNS names, etc. -/// -/// It must be possible for multiple Nexus instances to execute the same -/// blueprint concurrently and converge to the same thing. Thus, these _cannot_ -/// be how a blueprint works: -/// -/// - "add a Nexus zone" -- two Nexus instances operating concurrently would -/// add _two_ Nexus zones (which is wrong) -/// - "ensure that there is a Nexus zone on this sled with this id" -- the IP -/// addresses and images are left unspecified. Two Nexus instances could pick -/// different IPs or images for the zone. -/// -/// This is why blueprints must be so detailed. The key principle here is that -/// **all the work of ensuring that the system do the right thing happens in one -/// process (the update planner in one Nexus instance). Once a blueprint has -/// been committed, everyone is on the same page about how to execute it.** The -/// intent is that this makes both planning and executing a lot easier. In -/// particular, by the time we get to execution, all the hard choices have -/// already been made. -/// -/// Currently, blueprints are limited to describing only the set of Omicron -/// zones deployed on each host and some supporting configuration (e.g., DNS). -/// This is aimed at supporting add/remove sleds. The plan is to grow this to -/// include more of the system as we support more use cases. +// Blueprints are a fundamental part of how the system modifies itself. Each +// blueprint completely describes all of the software and configuration +// that the control plane manages. See the nexus/deployment crate-level +// documentation for details. +// +// Blueprints are different from policy. Policy describes the things that an +// operator would generally want to control. The blueprint describes the +// details of implementing that policy that an operator shouldn't have to deal +// with. For example, the operator might write policy that says "I want +// 5 external DNS zones". The system could then generate a blueprint that +// _has_ 5 external DNS zones on 5 specific sleds. The blueprint includes all +// the details needed to achieve that, including which image these zones should +// run, which zpools their persistent data should be stored on, their public and +// private IP addresses, their internal DNS names, etc. +// +// It must be possible for multiple Nexus instances to execute the same +// blueprint concurrently and converge to the same thing. Thus, these _cannot_ +// be how a blueprint works: +// +// - "add a Nexus zone" -- two Nexus instances operating concurrently would +// add _two_ Nexus zones (which is wrong) +// - "ensure that there is a Nexus zone on this sled with this id" -- the IP +// addresses and images are left unspecified. Two Nexus instances could pick +// different IPs or images for the zone. +// +// This is why blueprints must be so detailed. The key principle here is that +// **all the work of ensuring that the system do the right thing happens in one +// process (the update planner in one Nexus instance). Once a blueprint has +// been committed, everyone is on the same page about how to execute it.** The +// intent is that this makes both planning and executing a lot easier. In +// particular, by the time we get to execution, all the hard choices have +// already been made. +// +// Currently, blueprints are limited to describing only the set of Omicron +// zones deployed on each host and some supporting configuration (e.g., DNS). +// This is aimed at supporting add/remove sleds. The plan is to grow this to +// include more of the system as we support more use cases. #[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] pub struct Blueprint { /// unique identifier for this blueprint @@ -120,7 +120,7 @@ pub struct Blueprint { /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) - /// This would generally be the uuid of a Nexus instance. + /// This would generally be the Uuid of a Nexus instance. pub creator: String, /// human-readable string describing why this blueprint was created /// (for debugging) @@ -128,6 +128,8 @@ pub struct Blueprint { } impl Blueprint { + /// Iterate over all the Omicron zones in the blueprint, along with + /// associated sled id pub fn all_omicron_zones( &self, ) -> impl Iterator { @@ -136,21 +138,254 @@ impl Blueprint { .flat_map(|(sled_id, z)| z.zones.iter().map(|z| (*sled_id, z))) } + /// Iterate over the ids of all sleds in the blueprint pub fn sleds(&self) -> impl Iterator + '_ { self.omicron_zones.keys().copied() } + /// Summarize the difference between two blueprints pub fn diff<'a>(&'a self, other: &'a Blueprint) -> BlueprintDiff<'a> { BlueprintDiff { b1: self, b2: other } } } +/// Describes which blueprint the system is currently trying to make real +// This is analogous to the db model type until we have that. +#[derive(Debug, Clone)] +pub struct BlueprintTarget { + pub target_id: Option, + pub enabled: bool, + pub time_set: chrono::DateTime, +} + +/// Specifies what blueprint, if any, the system should be working toward +#[derive(Deserialize, JsonSchema)] +pub struct BlueprintTargetSet { + pub target_id: Uuid, + pub enabled: bool, +} + +/// Summarizes the differences between two blueprints pub struct BlueprintDiff<'a> { b1: &'a Blueprint, b2: &'a Blueprint, } +/// Describes a sled that appeared on both sides of a diff (possibly changed) +pub struct DiffSledCommon<'a> { + /// id of the sled + pub sled_id: Uuid, + /// generation of the "zones" configuration on the left side + pub generation_before: Generation, + /// generation of the "zones" configuration on the right side + pub generation_after: Generation, + zones_added: Vec<&'a OmicronZoneConfig>, + zones_removed: Vec<&'a OmicronZoneConfig>, + zones_common: Vec>, +} + +impl<'a> DiffSledCommon<'a> { + /// Iterate over zones added between the blueprints + pub fn zones_added( + &self, + ) -> impl Iterator + '_ { + self.zones_added.iter().copied() + } + + /// Iterate over zones removed between the blueprints + pub fn zones_removed( + &self, + ) -> impl Iterator + '_ { + self.zones_removed.iter().copied() + } + + /// Iterate over zones that are common to both blueprints + pub fn zones_in_common( + &self, + ) -> impl Iterator> + '_ { + self.zones_common.iter().copied() + } + + /// Iterate over zones that changed between the blue prints + pub fn zones_changed( + &self, + ) -> impl Iterator> + '_ { + self.zones_in_common() + .filter(|z| z.changed_how != DiffZoneChangedHow::NoChanges) + } +} + +/// Describes a zone that was common to both sides of a diff +#[derive(Debug, Copy, Clone)] +pub struct DiffZoneCommon<'a> { + /// full zone configuration before + pub zone_before: &'a OmicronZoneConfig, + /// full zone configuration after + pub zone_after: &'a OmicronZoneConfig, + /// summary of what changed, if anything + pub changed_how: DiffZoneChangedHow, +} + +/// Describes how a zone changed across two blueprints, if at all +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum DiffZoneChangedHow { + /// the zone did not change between these two blueprints + NoChanges, + /// the zone details are the same, but it was brought into service + AddedToService, + /// the zone details are the same, but it was removed from service + RemovedFromService, + /// the zone's details (i.e., configuration) changed + DetailsChanged, +} + impl<'a> BlueprintDiff<'a> { + /// Iterate over sleds only present in the second blueprint of a diff + pub fn sleds_added( + &self, + ) -> impl Iterator + '_ { + let sled_ids = self + .b2 + .sleds() + .collect::>() + .difference(&self.b1.sleds().collect::>()) + .copied() + .collect::>(); + + sled_ids.into_iter().map(|sled_id| { + (sled_id, self.b2.omicron_zones.get(&sled_id).unwrap()) + }) + } + + /// Iterate over sleds only present in the first blueprint of a diff + pub fn sleds_removed( + &self, + ) -> impl Iterator + '_ { + let sled_ids = self + .b1 + .sleds() + .collect::>() + .difference(&self.b2.sleds().collect::>()) + .copied() + .collect::>(); + sled_ids.into_iter().map(|sled_id| { + (sled_id, self.b1.omicron_zones.get(&sled_id).unwrap()) + }) + } + + /// Iterate over sleds present in both blueprints in a diff + pub fn sleds_in_common( + &self, + ) -> impl Iterator)> + '_ { + let sled_ids = self + .b1 + .sleds() + .collect::>() + .intersection(&self.b2.sleds().collect::>()) + .copied() + .collect::>(); + sled_ids.into_iter().map(|sled_id| { + let b1sledzones = self.b1.omicron_zones.get(&sled_id).unwrap(); + let b2sledzones = self.b2.omicron_zones.get(&sled_id).unwrap(); + + // Assemble separate summaries of the zones, indexed by zone id. + #[derive(Debug)] + struct ZoneInfo<'a> { + zone: &'a OmicronZoneConfig, + in_service: bool, + } + + let b1zones: BTreeMap = b1sledzones + .zones + .iter() + .map(|zone| { + ( + zone.id, + ZoneInfo { + zone, + in_service: self + .b1 + .zones_in_service + .contains(&zone.id), + }, + ) + }) + .collect(); + let b2zones: BTreeMap = b2sledzones + .zones + .iter() + .map(|zone| { + ( + zone.id, + ZoneInfo { + zone, + in_service: self + .b2 + .zones_in_service + .contains(&zone.id), + }, + ) + }) + .collect(); + let mut zones_added = vec![]; + let mut zones_removed = vec![]; + let mut zones_changed = vec![]; + + // Now go through each zone and compare them. + for (zone_id, b1z_info) in &b1zones { + if let Some(b2z_info) = b2zones.get(zone_id) { + let changed_how = if b1z_info.zone != b2z_info.zone { + DiffZoneChangedHow::DetailsChanged + } else if b1z_info.in_service && !b2z_info.in_service { + DiffZoneChangedHow::RemovedFromService + } else if !b1z_info.in_service && b2z_info.in_service { + DiffZoneChangedHow::AddedToService + } else { + DiffZoneChangedHow::NoChanges + }; + zones_changed.push(DiffZoneCommon { + zone_before: b1z_info.zone, + zone_after: b2z_info.zone, + changed_how, + }); + } else { + zones_removed.push(b1z_info.zone); + } + } + + for (zone_id, b2z_info) in &b2zones { + if b1zones.contains_key(&zone_id) { + // We handled this zone above. + continue; + } + + zones_added.push(b2z_info.zone) + } + + ( + sled_id, + DiffSledCommon { + sled_id, + generation_before: b1sledzones.generation, + generation_after: b2sledzones.generation, + zones_added, + zones_removed, + zones_common: zones_changed, + }, + ) + }) + } + + pub fn sleds_changed( + &self, + ) -> impl Iterator)> + '_ { + self.sleds_in_common().filter(|(_, sled_changes)| { + sled_changes.zones_added().next().is_some() + || sled_changes.zones_removed().next().is_some() + || sled_changes.zones_changed().next().is_some() + }) + } + fn print_whole_sled( &self, f: &mut std::fmt::Formatter<'_>, @@ -180,108 +415,55 @@ impl<'a> BlueprintDiff<'a> { } } +/// Implements diff(1)-like output for diff'ing two blueprints impl<'a> std::fmt::Display for BlueprintDiff<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "diff blueprints {} {}", self.b1.id, self.b2.id)?; writeln!(f, "--- {}", self.b1.id)?; writeln!(f, "+++ {}", self.b2.id)?; - let b1 = &self.b1; - let b2 = &self.b2; - - let all_sleds: BTreeSet = b1 - .sleds() - .collect::>() - .union(&b2.sleds().collect::>()) - .copied() - .collect(); - - for sled_id in all_sleds { - // First, figure out if this sled is only in one blueprint or common - // to both. - let maybe_b1sled = b1.omicron_zones.get(&sled_id); - let maybe_b2sled = b1.omicron_zones.get(&sled_id); - let (b1sledzones, b2sledzones) = match (maybe_b1sled, maybe_b2sled) - { - (None, None) => unreachable!(), - (Some(zones), None) => { - // This sled was removed altogether. Print it and move on. - self.print_whole_sled(f, '-', "removed", zones, sled_id)?; - continue; - } - (None, Some(zones)) => { - // This sled was added. Print it and move on. - self.print_whole_sled(f, '+', "added", zones, sled_id)?; - continue; - } - (Some(b1zones), Some(b2zones)) => (b1zones, b2zones), - }; + for (sled_id, sled_zones) in self.sleds_removed() { + self.print_whole_sled(f, '-', "removed", sled_zones, sled_id)?; + } - // At this point, we're looking at a sled that's in both blueprints. - // Print a line about the sled regardless of whether it's changed. + for (sled_id, sled_changes) in self.sleds_in_common() { + // Print a line about the sled itself and zone config generation, + // regardless of whether anything has changed. writeln!(f, " sled {}", sled_id)?; - - // Print a line about the zone config regardless of whether it's - // changed. - if b1sledzones.generation != b2sledzones.generation { + if sled_changes.generation_before != sled_changes.generation_after { writeln!( f, "- zone config generation {}", - b1sledzones.generation + sled_changes.generation_before )?; writeln!( f, "+ zone config generation {}", - b2sledzones.generation + sled_changes.generation_after )?; } else { writeln!( f, " zone config generation {}", - b1sledzones.generation + sled_changes.generation_before )?; } - // Assemble separate summaries of the zones, indexed by zone id. - #[derive(Debug)] - struct ZoneInfo<'a> { - zone: &'a OmicronZoneConfig, - in_service: bool, + for zone in sled_changes.zones_removed() { + writeln!( + f, + "- zone {} type {} (removed)", + zone.id, + zone.zone_type.label(), + )?; } - let b1zones: BTreeMap = b1sledzones - .zones - .iter() - .map(|zone| { - ( - zone.id, - ZoneInfo { - zone, - in_service: b1.zones_in_service.contains(&zone.id), - }, - ) - }) - .collect(); - let b2zones: BTreeMap = b2sledzones - .zones - .iter() - .map(|zone| { - ( - zone.id, - ZoneInfo { - zone, - in_service: b2.zones_in_service.contains(&zone.id), - }, - ) - }) - .collect(); - - // Now go through each zone and compare them. - for (zone_id, b1z_info) in &b1zones { - let zone_type = b1z_info.zone.zone_type.label(); - if let Some(b2z_info) = b2zones.get(zone_id) { - let zone2_type = b2z_info.zone.zone_type.label(); - if b1z_info.zone != b2z_info.zone { + for zone_changes in sled_changes.zones_in_common() { + let zone_id = zone_changes.zone_before.id; + let zone_type = zone_changes.zone_before.zone_type.label(); + let zone2_type = zone_changes.zone_after.zone_type.label(); + match zone_changes.changed_how { + DiffZoneChangedHow::DetailsChanged => { writeln!( f, "- zone {} type {} (changed)", @@ -292,7 +474,8 @@ impl<'a> std::fmt::Display for BlueprintDiff<'a> { "+ zone {} type {} (changed)", zone_id, zone2_type, )?; - } else if b1z_info.in_service && !b2z_info.in_service { + } + DiffZoneChangedHow::RemovedFromService => { writeln!( f, "- zone {} type {} (in service)", @@ -303,7 +486,8 @@ impl<'a> std::fmt::Display for BlueprintDiff<'a> { "+ zone {} type {} (removed from service)", zone_id, zone2_type, )?; - } else if !b1z_info.in_service && b2z_info.in_service { + } + DiffZoneChangedHow::AddedToService => { writeln!( f, "- zone {} type {} (not in service)", @@ -314,97 +498,31 @@ impl<'a> std::fmt::Display for BlueprintDiff<'a> { "+ zone {} type {} (added to service)", zone_id, zone2_type, )?; - } else { + } + DiffZoneChangedHow::NoChanges => { writeln!( f, " zone {} type {} (unchanged)", zone_id, zone_type, )?; } - } else { - writeln!( - f, - "- zone {} type {} (removed)", - zone_id, zone_type, - )?; } } - for (zone_id, b2z_info) in &b2zones { - if b1zones.contains_key(&zone_id) { - // We handled this zone above. - continue; - } - + for zone in sled_changes.zones_added() { writeln!( f, "+ zone {} type {} (added)", - zone_id, - b2z_info.zone.zone_type.label(), + zone.id, + zone.zone_type.label(), )?; } } - Ok(()) - } -} - -/// Describes which blueprint the system is currently trying to make real -// This is analogous to the db model type until we have that. -#[derive(Debug, Clone)] -pub struct BlueprintTarget { - pub target_id: Option, - pub enabled: bool, - pub time_set: chrono::DateTime, -} - -pub mod views { - use schemars::JsonSchema; - use serde::Serialize; - use thiserror::Error; - use uuid::Uuid; - - /// Describes what blueprint, if any, the system is currently working toward - #[derive(Debug, Serialize, JsonSchema)] - pub struct BlueprintTarget { - /// id of the blueprint that the system is trying to make real - pub target_id: Uuid, - /// policy: should the system actively work towards this blueprint - /// - /// This should generally be left enabled. - pub enabled: bool, - /// when this blueprint was made the target - pub time_set: chrono::DateTime, - } - - #[derive(Debug, Error)] - #[error("no target blueprint has been configured")] - pub struct NoTargetBlueprint; - - impl TryFrom for BlueprintTarget { - type Error = NoTargetBlueprint; - - fn try_from( - value: super::BlueprintTarget, - ) -> Result { - Ok(BlueprintTarget { - target_id: value.target_id.ok_or(NoTargetBlueprint)?, - enabled: value.enabled, - time_set: value.time_set, - }) + for (sled_id, sled_zones) in self.sleds_added() { + self.print_whole_sled(f, '+', "added", sled_zones, sled_id)?; } - } -} -pub mod params { - use schemars::JsonSchema; - use serde::Deserialize; - use uuid::Uuid; - - /// Specifies what blueprint, if any, the system should be working toward - #[derive(Deserialize, JsonSchema)] - pub struct BlueprintTargetSet { - pub target_id: Uuid, - pub enabled: bool, + Ok(()) } } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index ba9c835e4e..adda6ce175 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2069,20 +2069,24 @@ ] }, "Blueprint": { - "description": "Describes all automatically-managed software and configuration in the rack", + "description": "Describes a complete set of software and configuration for the system", "type": "object", "properties": { "comment": { + "description": "human-readable string describing why this blueprint was created (for debugging)", "type": "string" }, "creator": { + "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", "type": "string" }, "id": { + "description": "unique identifier for this blueprint", "type": "string", "format": "uuid" }, "omicron_zones": { + "description": "mapping: sled id -> zones deployed on each sled A sled is considered part of the control plane cluster iff it has an entry in this map.", "type": "object", "additionalProperties": { "$ref": "#/components/schemas/OmicronZonesConfig" @@ -2090,22 +2094,17 @@ }, "parent_blueprint_id": { "nullable": true, + "description": "which blueprint this blueprint is based on", "type": "string", "format": "uuid" }, - "sleds_in_cluster": { - "type": "array", - "items": { - "type": "string", - "format": "uuid" - }, - "uniqueItems": true - }, "time_created": { + "description": "when this blueprint was generated (for debugging)", "type": "string", "format": "date-time" }, "zones_in_service": { + "description": "Omicron zones considered in-service (which generally means that they should appear in DNS)", "type": "array", "items": { "type": "string", @@ -2119,7 +2118,6 @@ "creator", "id", "omicron_zones", - "sleds_in_cluster", "time_created", "zones_in_service" ]