From b1978e2529f8d2ce6966e84ec8623b4b297f2877 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 20 Dec 2024 16:47:21 -0500 Subject: [PATCH] [Reconfigurator] Blippy (#7276) This PR introduces Blippy for linting blueprints (see #6987). It initially only reports `FATAL` errors associated with specific sleds, but hopefully provides enough structure to see how that can expand to include other severities and other components. (At a minimum, there will be some blueprint- or policy-level component for things like "there aren't enough Nexus zones" that aren't associated with any particular sled.) As of this PR, the only user of Blippy is the builder test's `verify_blueprint`, from which I imported most of the checks that it current performs. I made a few of these checks slightly more strict, and from that I had to patch up a handful of tests that were doing weird things (e.g., manually expunging a zone without expunging its datasets) and also found one legitimate planner bug (I'll note in a separate comment below). --- Cargo.lock | 14 + Cargo.toml | 3 + common/src/api/external/mod.rs | 2 + common/src/api/internal/shared.rs | 24 +- nexus-sled-agent-shared/src/inventory.rs | 11 +- nexus/reconfigurator/blippy/Cargo.toml | 19 + nexus/reconfigurator/blippy/src/blippy.rs | 428 +++++ nexus/reconfigurator/blippy/src/checks.rs | 1564 +++++++++++++++++ nexus/reconfigurator/blippy/src/lib.rs | 24 + nexus/reconfigurator/blippy/src/report.rs | 85 + nexus/reconfigurator/execution/src/lib.rs | 4 +- nexus/reconfigurator/planning/Cargo.toml | 1 + .../planning/src/blueprint_builder/builder.rs | 217 +-- .../src/blueprint_builder/internal_dns.rs | 19 + nexus/reconfigurator/planning/src/planner.rs | 15 + ...dataset_settings_modified_in_place_1_2.txt | 4 +- .../planner_decommissions_sleds_1_2.txt | 4 +- ...lanner_expunge_clickhouse_clusters_3_4.txt | 4 +- ...lanner_expunge_clickhouse_clusters_5_6.txt | 4 +- .../output/planner_nonprovisionable_1_2.txt | 12 +- .../output/planner_nonprovisionable_2_2a.txt | 12 +- nexus/types/src/deployment.rs | 32 +- .../types/src/deployment/network_resources.rs | 48 +- nexus/types/src/deployment/zone_type.rs | 132 +- 24 files changed, 2447 insertions(+), 235 deletions(-) create mode 100644 nexus/reconfigurator/blippy/Cargo.toml create mode 100644 nexus/reconfigurator/blippy/src/blippy.rs create mode 100644 nexus/reconfigurator/blippy/src/checks.rs create mode 100644 nexus/reconfigurator/blippy/src/lib.rs create mode 100644 nexus/reconfigurator/blippy/src/report.rs diff --git a/Cargo.lock b/Cargo.lock index 05a7082aaa..5048914c74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6035,6 +6035,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "nexus-reconfigurator-blippy" +version = "0.1.0" +dependencies = [ + "nexus-reconfigurator-planning", + "nexus-sled-agent-shared", + "nexus-types", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", +] + [[package]] name = "nexus-reconfigurator-execution" version = "0.1.0" @@ -6101,6 +6114,7 @@ dependencies = [ "maplit", "nexus-config", "nexus-inventory", + "nexus-reconfigurator-blippy", "nexus-sled-agent-shared", "nexus-types", "omicron-common", diff --git a/Cargo.toml b/Cargo.toml index 3d8efe3acb..3d29f61cf9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ members = [ "nexus/macros-common", "nexus/metrics-producer-gc", "nexus/networking", + "nexus/reconfigurator/blippy", "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", @@ -213,6 +214,7 @@ default-members = [ "nexus/macros-common", "nexus/metrics-producer-gc", "nexus/networking", + "nexus/reconfigurator/blippy", "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", @@ -468,6 +470,7 @@ nexus-internal-api = { path = "nexus/internal-api" } nexus-macros-common = { path = "nexus/macros-common" } nexus-metrics-producer-gc = { path = "nexus/metrics-producer-gc" } nexus-networking = { path = "nexus/networking" } +nexus-reconfigurator-blippy = { path = "nexus/reconfigurator/blippy" } nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" } nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" } nexus-reconfigurator-preparation = { path = "nexus/reconfigurator/preparation" } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 38a9de0564..ab46f9f7f6 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1930,6 +1930,8 @@ impl JsonSchema for L4PortRange { DeserializeFromStr, PartialEq, Eq, + PartialOrd, + Ord, SerializeDisplay, Hash, )] diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 94440df2d5..a3f28a258a 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -48,7 +48,16 @@ pub enum NetworkInterfaceKind { /// Information required to construct a virtual network interface #[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, )] pub struct NetworkInterface { pub id: Uuid, @@ -68,7 +77,18 @@ pub struct NetworkInterface { /// outbound network connections from guests or services. // Note that `Deserialize` is manually implemented; if you make any changes to // the fields of this structure, you must make them to that implementation too. -#[derive(Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq, Hash)] +#[derive( + Debug, + Clone, + Copy, + Serialize, + JsonSchema, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, +)] pub struct SourceNatConfig { /// The external address provided to the instance or service. pub ip: IpAddr, diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 5fb2d55203..3b9daf583e 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -173,7 +173,16 @@ impl OmicronZoneConfig { /// Describes a persistent ZFS dataset associated with an Omicron zone #[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, )] pub struct OmicronZoneDataset { pub pool_name: ZpoolName, diff --git a/nexus/reconfigurator/blippy/Cargo.toml b/nexus/reconfigurator/blippy/Cargo.toml new file mode 100644 index 0000000000..e7f7208871 --- /dev/null +++ b/nexus/reconfigurator/blippy/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "nexus-reconfigurator-blippy" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +nexus-sled-agent-shared.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +nexus-reconfigurator-planning.workspace = true +omicron-test-utils.workspace = true diff --git a/nexus/reconfigurator/blippy/src/blippy.rs b/nexus/reconfigurator/blippy/src/blippy.rs new file mode 100644 index 0000000000..9e9cc84b32 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/blippy.rs @@ -0,0 +1,428 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::checks; +use crate::report::BlippyReport; +use crate::report::BlippyReportSortKey; +use core::fmt; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintDatasetConfig; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::inventory::ZpoolName; +use omicron_common::address::DnsSubnet; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::MacAddr; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; +use std::collections::BTreeSet; +use std::net::IpAddr; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Note { + pub severity: Severity, + pub kind: Kind, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Severity { + /// Indicator of a serious problem that means the blueprint is invalid. + Fatal, +} + +impl fmt::Display for Severity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Severity::Fatal => write!(f, "FATAL"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Kind { + Sled { sled_id: SledUuid, kind: SledKind }, +} + +impl Kind { + pub fn display_component(&self) -> impl fmt::Display + '_ { + enum Component<'a> { + Sled(&'a SledUuid), + } + + impl fmt::Display for Component<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Component::Sled(id) => write!(f, "sled {id}"), + } + } + } + + match self { + Kind::Sled { sled_id, .. } => Component::Sled(sled_id), + } + } + + pub fn display_subkind(&self) -> impl fmt::Display + '_ { + enum Subkind<'a> { + Sled(&'a SledKind), + } + + impl fmt::Display for Subkind<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subkind::Sled(kind) => write!(f, "{kind}"), + } + } + } + + match self { + Kind::Sled { kind, .. } => Subkind::Sled(kind), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum SledKind { + /// Two running zones have the same underlay IP address. + DuplicateUnderlayIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + }, + /// A sled has two zones that are not members of the same sled subnet. + SledWithMixedUnderlaySubnets { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + }, + /// Two sleds are using the same sled subnet. + ConflictingSledSubnets { + other_sled: SledUuid, + subnet: Ipv6Subnet, + }, + /// An internal DNS zone has an IP that is not one of the expected rack DNS + /// subnets. + InternalDnsZoneBadSubnet { + zone: BlueprintZoneConfig, + rack_dns_subnets: BTreeSet, + }, + /// Two running zones have the same external IP address. + DuplicateExternalIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + ip: IpAddr, + }, + /// Two running zones' NICs have the same IP address. + DuplicateNicIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + ip: IpAddr, + }, + /// Two running zones' NICs have the same MAC address. + DuplicateNicMac { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + mac: MacAddr, + }, + /// Two zones with the same durable dataset kind are on the same zpool. + ZoneDurableDatasetCollision { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + zpool: ZpoolName, + }, + /// Two zones with the same filesystem dataset kind are on the same zpool. + ZoneFilesystemDatasetCollision { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + zpool: ZpoolName, + }, + /// One zpool has two datasets of the same kind. + ZpoolWithDuplicateDatasetKinds { + dataset1: BlueprintDatasetConfig, + dataset2: BlueprintDatasetConfig, + zpool: ZpoolUuid, + }, + /// A zpool is missing its Debug dataset. + ZpoolMissingDebugDataset { zpool: ZpoolUuid }, + /// A zpool is missing its Zone Root dataset. + ZpoolMissingZoneRootDataset { zpool: ZpoolUuid }, + /// A zone's filesystem dataset is missing from `blueprint_datasets`. + ZoneMissingFilesystemDataset { zone: BlueprintZoneConfig }, + /// A zone's durable dataset is missing from `blueprint_datasets`. + ZoneMissingDurableDataset { zone: BlueprintZoneConfig }, + /// A zone's durable dataset and transient root dataset are on different + /// zpools. + ZoneWithDatasetsOnDifferentZpools { + zone: BlueprintZoneConfig, + durable_zpool: ZpoolName, + transient_zpool: ZpoolName, + }, + /// A sled is missing entries in `Blueprint::blueprint_datasets`. + /// + /// `why` indicates why we expected this sled to have an entry. + SledMissingDatasets { why: &'static str }, + /// A sled is missing entries in `Blueprint::blueprint_disks`. + /// + /// `why` indicates why we expected this sled to have an entry. + SledMissingDisks { why: &'static str }, + /// A dataset is present but not referenced by any in-service zone or disk. + OrphanedDataset { dataset: BlueprintDatasetConfig }, + /// A dataset claims to be on a zpool that does not exist. + DatasetOnNonexistentZpool { dataset: BlueprintDatasetConfig }, +} + +impl fmt::Display for SledKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledKind::DuplicateUnderlayIp { zone1, zone2 } => { + write!( + f, + "duplicate underlay IP {} ({:?} {} and {:?} {})", + zone1.underlay_ip(), + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::SledWithMixedUnderlaySubnets { zone1, zone2 } => { + write!( + f, + "zones have underlay IPs on two different sled subnets: \ + {:?} {} ({}) and {:?} {} ({})", + zone1.zone_type.kind(), + zone1.id, + zone1.underlay_ip(), + zone2.zone_type.kind(), + zone2.id, + zone2.underlay_ip(), + ) + } + SledKind::ConflictingSledSubnets { other_sled, subnet } => { + write!( + f, + "duplicate sled subnet {} with sled {other_sled}", + subnet.net() + ) + } + SledKind::InternalDnsZoneBadSubnet { zone, rack_dns_subnets } => { + write!( + f, + "internal DNS zone {} underlay IP {} is not \ + one of the reserved rack DNS subnets ({:?})", + zone.id, + zone.underlay_ip(), + rack_dns_subnets + ) + } + SledKind::DuplicateExternalIp { zone1, zone2, ip } => { + write!( + f, + "duplicate external IP {ip} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::DuplicateNicIp { zone1, zone2, ip } => { + write!( + f, + "duplicate NIC IP {ip} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::DuplicateNicMac { zone1, zone2, mac } => { + write!( + f, + "duplicate NIC MAC {mac} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZoneDurableDatasetCollision { zone1, zone2, zpool } => { + write!( + f, + "zpool {zpool} has two zone datasets of the same kind \ + ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZoneFilesystemDatasetCollision { + zone1, + zone2, + zpool, + } => { + write!( + f, + "zpool {zpool} has two zone filesystems of the same kind \ + ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1, + dataset2, + zpool, + } => { + write!( + f, + "two datasets of the same kind on zpool {zpool} \ + ({:?} {} and {:?} {})", + dataset1.kind, dataset1.id, dataset2.kind, dataset2.id, + ) + } + SledKind::ZpoolMissingDebugDataset { zpool } => { + write!(f, "zpool {zpool} is missing its Debug dataset") + } + SledKind::ZpoolMissingZoneRootDataset { zpool } => { + write!(f, "zpool {zpool} is missing its Zone Root dataset") + } + SledKind::ZoneMissingFilesystemDataset { zone } => { + write!( + f, + "in-service zone's filesytem dataset is missing: {:?} {}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::ZoneMissingDurableDataset { zone } => { + write!( + f, + "in-service zone's durable dataset is missing: {:?} {}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::ZoneWithDatasetsOnDifferentZpools { + zone, + durable_zpool, + transient_zpool, + } => { + write!( + f, + "zone {:?} {} has its durable dataset on \ + zpool {durable_zpool} but its root dataset on \ + zpool {transient_zpool}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::SledMissingDatasets { why } => { + write!(f, "missing entry in blueprint_datasets ({why})") + } + SledKind::SledMissingDisks { why } => { + write!(f, "missing entry in blueprint_disks ({why})") + } + SledKind::OrphanedDataset { dataset } => { + let parent = match dataset.kind { + DatasetKind::Cockroach + | DatasetKind::Crucible + | DatasetKind::Clickhouse + | DatasetKind::ClickhouseKeeper + | DatasetKind::ClickhouseServer + | DatasetKind::ExternalDns + | DatasetKind::InternalDns + | DatasetKind::TransientZone { .. } => "zone", + DatasetKind::TransientZoneRoot + | DatasetKind::Debug + | DatasetKind::Update => "disk", + }; + write!( + f, + "in-service dataset ({:?} {}) with no associated {parent}", + dataset.kind, dataset.id + ) + } + SledKind::DatasetOnNonexistentZpool { dataset } => { + write!( + f, + "in-service dataset ({:?} {}) on non-existent zpool {}", + dataset.kind, dataset.id, dataset.pool + ) + } + } + } +} + +impl Note { + pub fn display(&self, sort_key: BlippyReportSortKey) -> NoteDisplay<'_> { + NoteDisplay { note: self, sort_key } + } +} + +#[derive(Debug)] +pub struct NoteDisplay<'a> { + note: &'a Note, + sort_key: BlippyReportSortKey, +} + +impl fmt::Display for NoteDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.sort_key { + BlippyReportSortKey::Kind => { + write!( + f, + "{}: {} note: {}", + self.note.kind.display_component(), + self.note.severity, + self.note.kind.display_subkind(), + ) + } + BlippyReportSortKey::Severity => { + write!( + f, + "{} note: {}: {}", + self.note.severity, + self.note.kind.display_component(), + self.note.kind.display_subkind(), + ) + } + } + } +} + +#[derive(Debug)] +pub struct Blippy<'a> { + blueprint: &'a Blueprint, + notes: Vec, +} + +impl<'a> Blippy<'a> { + pub fn new(blueprint: &'a Blueprint) -> Self { + let mut slf = Self { blueprint, notes: Vec::new() }; + checks::perform_all_blueprint_only_checks(&mut slf); + slf + } + + pub fn blueprint(&self) -> &'a Blueprint { + self.blueprint + } + + pub(crate) fn push_sled_note( + &mut self, + sled_id: SledUuid, + severity: Severity, + kind: SledKind, + ) { + self.notes.push(Note { severity, kind: Kind::Sled { sled_id, kind } }); + } + + pub fn into_report( + self, + sort_key: BlippyReportSortKey, + ) -> BlippyReport<'a> { + BlippyReport::new(self.blueprint, self.notes, sort_key) + } +} diff --git a/nexus/reconfigurator/blippy/src/checks.rs b/nexus/reconfigurator/blippy/src/checks.rs new file mode 100644 index 0000000000..f5673cb77c --- /dev/null +++ b/nexus/reconfigurator/blippy/src/checks.rs @@ -0,0 +1,1564 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::blippy::Blippy; +use crate::blippy::Severity; +use crate::blippy::SledKind; +use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::deployment::BlueprintDatasetConfig; +use nexus_types::deployment::BlueprintDatasetFilter; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::OmicronZoneExternalIp; +use omicron_common::address::DnsSubnet; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::net::Ipv6Addr; + +pub(crate) fn perform_all_blueprint_only_checks(blippy: &mut Blippy<'_>) { + check_underlay_ips(blippy); + check_external_networking(blippy); + check_dataset_zpool_uniqueness(blippy); + check_datasets(blippy); +} + +fn check_underlay_ips(blippy: &mut Blippy<'_>) { + let mut underlay_ips: BTreeMap = + BTreeMap::new(); + let mut inferred_sled_subnets_by_sled: BTreeMap< + SledUuid, + (Ipv6Subnet, &BlueprintZoneConfig), + > = BTreeMap::new(); + let mut inferred_sled_subnets_by_subnet: BTreeMap< + Ipv6Subnet, + SledUuid, + > = BTreeMap::new(); + let mut rack_dns_subnets: BTreeSet = BTreeSet::new(); + + for (sled_id, zone) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + let ip = zone.underlay_ip(); + + // There should be no duplicate underlay IPs. + if let Some(previous) = underlay_ips.insert(ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateUnderlayIp { + zone1: previous.clone(), + zone2: zone.clone(), + }, + ); + } + + if zone.zone_type.is_internal_dns() { + // Internal DNS zones should have IPs coming from the reserved rack + // DNS subnets. + let subnet = DnsSubnet::from_addr(ip); + if rack_dns_subnets.is_empty() { + // The blueprint doesn't store the rack subnet explicitly, so we + // infer it based on the first internal DNS zone we see. + rack_dns_subnets.extend(subnet.rack_subnet().get_dns_subnets()); + } + if !rack_dns_subnets.contains(&subnet) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::InternalDnsZoneBadSubnet { + zone: zone.clone(), + rack_dns_subnets: rack_dns_subnets.clone(), + }, + ); + } + } else { + let subnet = Ipv6Subnet::new(ip); + + // Any given subnet should be used by at most one sled. + match inferred_sled_subnets_by_subnet.entry(subnet) { + Entry::Vacant(slot) => { + slot.insert(sled_id); + } + Entry::Occupied(prev) => { + if *prev.get() != sled_id { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ConflictingSledSubnets { + other_sled: *prev.get(), + subnet, + }, + ); + } + } + } + + // Any given sled should have IPs within at most one subnet. + // + // The blueprint doesn't store sled subnets explicitly, so we can't + // check that each sled is using the subnet it's supposed to. The + // best we can do is check that the sleds are internally consistent. + match inferred_sled_subnets_by_sled.entry(sled_id) { + Entry::Vacant(slot) => { + slot.insert((subnet, zone)); + } + Entry::Occupied(prev) => { + if prev.get().0 != subnet { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledWithMixedUnderlaySubnets { + zone1: prev.get().1.clone(), + zone2: zone.clone(), + }, + ); + } + } + } + } + } +} + +fn check_external_networking(blippy: &mut Blippy<'_>) { + let mut used_external_ips = BTreeMap::new(); + let mut used_external_floating_ips = BTreeMap::new(); + let mut used_external_snat_ips = BTreeMap::new(); + + let mut used_nic_ips = BTreeMap::new(); + let mut used_nic_macs = BTreeMap::new(); + + for (sled_id, zone, external_ip, nic) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|(sled_id, zone)| { + zone.zone_type + .external_networking() + .map(|(external_ip, nic)| (sled_id, zone, external_ip, nic)) + }) + { + // There should be no duplicate external IPs. + if let Some(prev_zone) = used_external_ips.insert(external_ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateExternalIp { + zone1: prev_zone.clone(), + zone2: zone.clone(), + ip: external_ip.ip(), + }, + ); + } + + // See the loop below; we build up separate maps to check for + // Floating/SNAT overlap that wouldn't be caught by the exact + // `used_external_ips` map above. + match external_ip { + OmicronZoneExternalIp::Floating(floating) => { + used_external_floating_ips.insert(floating.ip, zone); + } + OmicronZoneExternalIp::Snat(snat) => { + used_external_snat_ips + .insert(snat.snat_cfg.ip, (sled_id, zone)); + } + } + + // There should be no duplicate NIC IPs or MACs. + if let Some(prev_zone) = used_nic_ips.insert(nic.ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateNicIp { + zone1: prev_zone.clone(), + zone2: zone.clone(), + ip: nic.ip, + }, + ); + } + if let Some(prev_zone) = used_nic_macs.insert(nic.mac, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateNicMac { + zone1: prev_zone.clone(), + zone2: zone.clone(), + mac: nic.mac, + }, + ); + } + } + + // The loop above noted any exact duplicates; we should also check for any + // SNAT / Floating overlaps. For each SNAT IP, ensure we don't have a + // floating IP at the same address. + for (ip, (sled_id, zone2)) in used_external_snat_ips { + if let Some(&zone1) = used_external_floating_ips.get(&ip) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateExternalIp { + zone1: zone1.clone(), + zone2: zone2.clone(), + ip, + }, + ); + } + } +} + +fn check_dataset_zpool_uniqueness(blippy: &mut Blippy<'_>) { + let mut durable_kinds_by_zpool: BTreeMap> = + BTreeMap::new(); + let mut transient_kinds_by_zpool: BTreeMap< + ZpoolUuid, + BTreeMap, + > = BTreeMap::new(); + + // On any given zpool, we should have at most one zone of any given + // kind. + for (sled_id, zone) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + // Check "one kind per zpool" for durable datasets... + if let Some(dataset) = zone.zone_type.durable_dataset() { + let kind = zone.zone_type.kind(); + if let Some(previous) = durable_kinds_by_zpool + .entry(dataset.dataset.pool_name.id()) + .or_default() + .insert(kind, zone) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneDurableDatasetCollision { + zone1: previous.clone(), + zone2: zone.clone(), + zpool: dataset.dataset.pool_name.clone(), + }, + ); + } + } + + // ... and transient datasets. + if let Some(dataset) = zone.filesystem_dataset() { + let kind = zone.zone_type.kind(); + if let Some(previous) = transient_kinds_by_zpool + .entry(dataset.pool().id()) + .or_default() + .insert(kind, zone) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneFilesystemDatasetCollision { + zone1: previous.clone(), + zone2: zone.clone(), + zpool: dataset.into_parts().0, + }, + ); + } + } + + // If a zone has both durable and transient datasets, they should be on + // the same pool. + match (zone.zone_type.durable_zpool(), zone.filesystem_pool.as_ref()) { + (Some(durable), Some(transient)) if durable != transient => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: zone.clone(), + durable_zpool: durable.clone(), + transient_zpool: transient.clone(), + }, + ); + } + _ => (), + } + } +} + +type DatasetByKind<'a> = BTreeMap; +type DatasetsByZpool<'a> = BTreeMap>; + +#[derive(Debug)] +struct DatasetsBySled<'a> { + by_sled: BTreeMap>, + noted_sleds_missing_datasets: BTreeSet, +} + +impl<'a> DatasetsBySled<'a> { + fn new(blippy: &mut Blippy<'a>) -> Self { + let mut by_sled = BTreeMap::new(); + + for (&sled_id, config) in &blippy.blueprint().blueprint_datasets { + let by_zpool: &mut BTreeMap<_, _> = + by_sled.entry(sled_id).or_default(); + + for dataset in config.datasets.values() { + let by_kind: &mut BTreeMap<_, _> = + by_zpool.entry(dataset.pool.id()).or_default(); + + match by_kind.entry(dataset.kind.clone()) { + Entry::Vacant(slot) => { + slot.insert(dataset); + } + Entry::Occupied(prev) => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1: (*prev.get()).clone(), + dataset2: dataset.clone(), + zpool: dataset.pool.id(), + }, + ); + } + } + } + } + + Self { by_sled, noted_sleds_missing_datasets: BTreeSet::new() } + } + + // Get the datasets for each zpool on a given sled, or add a fatal note to + // `blippy` that the sled is missing an entry in `blueprint_datasets` for + // the specified reason `why`. + fn get_sled_or_note_missing( + &mut self, + blippy: &mut Blippy<'_>, + sled_id: SledUuid, + why: &'static str, + ) -> Option<&DatasetsByZpool<'a>> { + let maybe_datasets = self.by_sled.get(&sled_id); + if maybe_datasets.is_none() + && self.noted_sleds_missing_datasets.insert(sled_id) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledMissingDatasets { why }, + ); + } + maybe_datasets + } +} + +fn check_datasets(blippy: &mut Blippy<'_>) { + let mut datasets = DatasetsBySled::new(blippy); + + // As we loop through all the datasets we expect to see, mark them down. + // Afterwards, we'll check for any datasets present that we _didn't_ expect + // to see. + let mut expected_datasets = BTreeSet::new(); + + // All disks should have debug and zone root datasets. + // + // TODO-correctness We currently only include in-service disks in the + // blueprint; once we include expunged or decommissioned disks too, we + // should filter here to only in-service. + for (&sled_id, disk_config) in &blippy.blueprint().blueprint_disks { + let Some(sled_datasets) = datasets.get_sled_or_note_missing( + blippy, + sled_id, + "sled has an entry in blueprint_disks", + ) else { + continue; + }; + + for disk in &disk_config.disks { + let sled_datasets = sled_datasets.get(&disk.pool_id); + + match sled_datasets + .and_then(|by_zpool| by_zpool.get(&DatasetKind::Debug)) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolMissingDebugDataset { + zpool: disk.pool_id, + }, + ); + } + } + + match sled_datasets.and_then(|by_zpool| { + by_zpool.get(&DatasetKind::TransientZoneRoot) + }) { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolMissingZoneRootDataset { + zpool: disk.pool_id, + }, + ); + } + } + } + } + + // There should be a dataset for every dataset referenced by a running zone + // (filesystem or durable). + for (sled_id, zone_config) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + let Some(sled_datasets) = datasets.get_sled_or_note_missing( + blippy, + sled_id, + "sled has running zones", + ) else { + continue; + }; + + match &zone_config.filesystem_dataset() { + Some(dataset) => { + match sled_datasets + .get(&dataset.pool().id()) + .and_then(|by_zpool| by_zpool.get(dataset.dataset())) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneMissingFilesystemDataset { + zone: zone_config.clone(), + }, + ); + } + } + } + None => { + // TODO-john Add a Severity::BackwardsCompatibility and note the + // missing filesystem pool + } + } + + if let Some(dataset) = zone_config.zone_type.durable_dataset() { + match sled_datasets + .get(&dataset.dataset.pool_name.id()) + .and_then(|by_zpool| by_zpool.get(&dataset.kind)) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneMissingDurableDataset { + zone: zone_config.clone(), + }, + ); + } + } + } + } + + // TODO-correctness We currently only include in-service disks in the + // blueprint; once we include expunged or decommissioned disks too, we + // should filter here to only in-service. + let in_service_sled_zpools = blippy + .blueprint() + .blueprint_disks + .iter() + .map(|(sled_id, disk_config)| { + ( + sled_id, + disk_config + .disks + .iter() + .map(|disk| disk.pool_id) + .collect::>(), + ) + }) + .collect::>(); + let mut noted_sleds_without_disks = BTreeSet::new(); + + // All datasets should be on zpools that have disk records, and all datasets + // should have been referenced by either a zone or a disk above. + for (sled_id, dataset) in blippy + .blueprint() + .all_omicron_datasets(BlueprintDatasetFilter::InService) + { + if !expected_datasets.contains(&dataset.id) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::OrphanedDataset { dataset: dataset.clone() }, + ); + continue; + } + + let Some(sled_zpools) = in_service_sled_zpools.get(&sled_id) else { + if noted_sleds_without_disks.insert(sled_id) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledMissingDisks { + why: "sled has in-service datasets", + }, + ); + } + continue; + }; + + if !sled_zpools.contains(&dataset.pool.id()) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DatasetOnNonexistentZpool { + dataset: dataset.clone(), + }, + ); + continue; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blippy::Kind; + use crate::blippy::Note; + use crate::BlippyReportSortKey; + use nexus_reconfigurator_planning::example::example; + use nexus_reconfigurator_planning::example::ExampleSystemBuilder; + use nexus_types::deployment::blueprint_zone_type; + use nexus_types::deployment::BlueprintZoneType; + use omicron_test_utils::dev::test_setup_log; + + // The tests below all take the example blueprint, mutate in some invalid + // way, and confirm that blippy reports the invalidity. This test confirms + // the unmutated blueprint has no blippy notes. + #[test] + fn test_example_blueprint_is_blippy_clean() { + static TEST_NAME: &str = "test_example_blueprint_is_blippy_clean"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, blueprint) = example(&logctx.log, TEST_NAME); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + if !report.notes().is_empty() { + eprintln!("{}", report.display()); + panic!("example blueprint should have no blippy notes"); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_underlay_ips() { + static TEST_NAME: &str = "test_duplicate_underlay_ips"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the underlay IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = nexus0.underlay_ip(); + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + .. + }) => { + internal_address.set_ip(dup_ip); + } + _ => unreachable!("this is a Nexus zone"), + }; + + // This illegal modification should result in at least three notes: a + // duplicate underlay IP, duplicate sled subnets, and sled1 having mixed + // underlay subnets (the details of which depend on the ordering of + // zones, so we'll sort that out here). + let nexus0 = nexus0.clone(); + let nexus1 = nexus1.clone(); + let (mixed_underlay_zone1, mixed_underlay_zone2) = { + let mut sled1_zones = blueprint + .blueprint_zones + .get(&nexus1_sled_id) + .unwrap() + .zones + .iter(); + let sled1_zone1 = sled1_zones.next().expect("at least one zone"); + let sled1_zone2 = sled1_zones.next().expect("at least two zones"); + if sled1_zone1.id == nexus1.id { + (nexus1.clone(), sled1_zone2.clone()) + } else { + (sled1_zone1.clone(), nexus1.clone()) + } + }; + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateUnderlayIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::SledWithMixedUnderlaySubnets { + zone1: mixed_underlay_zone1, + zone2: mixed_underlay_zone2, + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::ConflictingSledSubnets { + other_sled: nexus0_sled_id, + subnet: Ipv6Subnet::new(dup_ip), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_bad_internal_dns_subnet() { + static TEST_NAME: &str = "test_bad_internal_dns_subnet"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Change the second internal DNS zone to be from a different rack + // subnet. + let mut internal_dns_iter = blueprint + .blueprint_zones + .iter_mut() + .flat_map(|(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_internal_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }); + let (dns0_sled_id, dns0) = + internal_dns_iter.next().expect("at least one internal DNS zone"); + let (dns1_sled_id, dns1) = + internal_dns_iter.next().expect("at least two internal DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dns0_ip = dns0.underlay_ip(); + let rack_subnet = DnsSubnet::from_addr(dns0_ip).rack_subnet(); + let different_rack_subnet = { + // Flip the high bit of the existing underlay IP to guarantee a + // different rack subnet + let hi_bit = 1_u128 << 127; + let lo_bits = !hi_bit; + let hi_bit_ip = Ipv6Addr::from(hi_bit); + let lo_bits_ip = Ipv6Addr::from(lo_bits); + // Build XOR out of the operations we have... + let flipped_ip = if hi_bit_ip & dns0_ip == hi_bit_ip { + dns0_ip & lo_bits_ip + } else { + dns0_ip | hi_bit_ip + }; + DnsSubnet::from_addr(flipped_ip).rack_subnet() + }; + let different_dns_subnet = different_rack_subnet.get_dns_subnet(0); + + match &mut dns1.zone_type { + BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + http_address, + dns_address, + .. + }, + ) => { + http_address.set_ip(different_dns_subnet.dns_address()); + dns_address.set_ip(different_dns_subnet.dns_address()); + } + _ => unreachable!("this is an internal DNS zone"), + }; + + let expected_note = Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::InternalDnsZoneBadSubnet { + zone: dns1.clone(), + rack_dns_subnets: rack_subnet + .get_dns_subnets() + .into_iter() + .collect(), + }, + }, + }; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + assert!( + report.notes().contains(&expected_note), + "did not find expected note {expected_note:?}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_external_ip() { + static TEST_NAME: &str = "test_duplicate_external_ip"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = match nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .0 + { + OmicronZoneExternalIp::Floating(ip) => ip, + OmicronZoneExternalIp::Snat(_) => { + unreachable!("Nexus has a floating IP") + } + }; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) => { + *external_ip = dup_ip; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateExternalIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + ip: dup_ip.ip, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_nic_ip() { + static TEST_NAME: &str = "test_duplicate_nic_ip"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .1 + .ip; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) => { + nic.ip = dup_ip; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateNicIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + ip: dup_ip, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_nic_mac() { + static TEST_NAME: &str = "test_duplicate_nic_mac"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_mac = nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .1 + .mac; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) => { + nic.mac = dup_mac; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateNicMac { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + mac: dup_mac, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_durable_dataset_collision() { + static TEST_NAME: &str = "test_durable_dataset_collision"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .external_dns_count(2) + .unwrap() + .build(); + + // Copy the durable zpool from one external DNS to another. + let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_external_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (dns0_sled_id, dns0) = + dns_iter.next().expect("at least one external DNS zone"); + let (dns1_sled_id, dns1) = + dns_iter.next().expect("at least two external DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dup_zpool = dns0 + .zone_type + .durable_zpool() + .expect("external DNS has a durable zpool") + .clone(); + match &mut dns1.zone_type { + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dataset, .. }, + ) => { + dataset.pool_name = dup_zpool.clone(); + } + _ => unreachable!("this is an external DNS zone"), + }; + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneDurableDatasetCollision { + zone1: dns0.clone(), + zone2: dns1.clone(), + zpool: dup_zpool.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: dns1.clone(), + durable_zpool: dup_zpool.clone(), + transient_zpool: dns1.filesystem_pool.clone().unwrap(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_transient_root_dataset_collision() { + static TEST_NAME: &str = "test_transient_root_dataset_collision"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .external_dns_count(2) + .unwrap() + .build(); + + // Copy the filesystem zpool from one external DNS to another. + let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_external_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (dns0_sled_id, dns0) = + dns_iter.next().expect("at least one external DNS zone"); + let (dns1_sled_id, dns1) = + dns_iter.next().expect("at least two external DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dup_zpool = dns0 + .filesystem_pool + .as_ref() + .expect("external DNS has a filesystem zpool") + .clone(); + dns1.filesystem_pool = Some(dup_zpool.clone()); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneFilesystemDatasetCollision { + zone1: dns0.clone(), + zone2: dns1.clone(), + zpool: dup_zpool.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: dns1.clone(), + durable_zpool: dns1 + .zone_type + .durable_zpool() + .unwrap() + .clone(), + transient_zpool: dup_zpool.clone(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zpool_with_duplicate_dataset_kinds() { + static TEST_NAME: &str = "test_zpool_with_duplicate_dataset_kinds"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + let mut by_kind = BTreeMap::new(); + + // Loop over the datasets until we find a dataset kind that already + // exists on a different zpool, then copy it over. + let mut found_sled_id = None; + let mut dataset1 = None; + let mut dataset2 = None; + let mut zpool = None; + 'outer: for (sled_id, datasets_config) in + blueprint.blueprint_datasets.iter_mut() + { + for dataset in datasets_config.datasets.values_mut() { + if let Some(prev) = + by_kind.insert(dataset.kind.clone(), dataset.clone()) + { + dataset.pool = prev.pool.clone(); + + found_sled_id = Some(*sled_id); + dataset1 = Some(prev); + dataset2 = Some(dataset.clone()); + zpool = Some(dataset.pool.clone()); + break 'outer; + } + } + } + let sled_id = found_sled_id.expect("found dataset to move"); + let dataset1 = dataset1.expect("found dataset to move"); + let dataset2 = dataset2.expect("found dataset to move"); + let zpool = zpool.expect("found dataset to move"); + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id, + kind: SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1, + dataset2, + zpool: zpool.id(), + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zpool_missing_default_datasets() { + static TEST_NAME: &str = "test_zpool_missing_default_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Drop the Debug dataset from one zpool and the ZoneRoot dataset from + // another; we should catch both errors. + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + + let mut debug_dataset = None; + let mut zoneroot_dataset = None; + for dataset in &mut datasets_config.datasets.values_mut() { + match &dataset.kind { + DatasetKind::Debug if debug_dataset.is_none() => { + debug_dataset = Some(dataset.clone()); + } + DatasetKind::TransientZoneRoot + if debug_dataset.is_some() + && zoneroot_dataset.is_none() => + { + if Some(&dataset.pool) + != debug_dataset.as_ref().map(|d| &d.pool) + { + zoneroot_dataset = Some(dataset.clone()); + break; + } + } + _ => (), + } + } + let debug_dataset = + debug_dataset.expect("found Debug dataset to prune"); + let zoneroot_dataset = + zoneroot_dataset.expect("found ZoneRoot dataset to prune"); + assert_ne!(debug_dataset.pool, zoneroot_dataset.pool); + + // Actually strip these from the blueprint. + datasets_config.datasets.retain(|&dataset_id, _| { + dataset_id != debug_dataset.id && dataset_id != zoneroot_dataset.id + }); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZpoolMissingDebugDataset { + zpool: debug_dataset.pool.id(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZpoolMissingZoneRootDataset { + zpool: zoneroot_dataset.pool.id(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zone_missing_datasets() { + static TEST_NAME: &str = "test_zone_missing_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + let zones_config = blueprint + .blueprint_zones + .get(sled_id) + .expect("got zones for sled with datasets"); + + // Pick a zone with a durable dataset to remove, and a different zone + // with a filesystem_pool dataset to remove. + let mut durable_zone = None; + let mut root_zone = None; + for z in &zones_config.zones { + if durable_zone.is_none() { + if z.zone_type.durable_zpool().is_some() { + durable_zone = Some(z.clone()); + } + } else if root_zone.is_none() { + root_zone = Some(z); + break; + } + } + let durable_zone = + durable_zone.expect("found zone with durable dataset to prune"); + let root_zone = + root_zone.expect("found zone with root dataset to prune"); + assert_ne!(durable_zone.filesystem_pool, root_zone.filesystem_pool); + + // Actually strip these from the blueprint. + datasets_config.datasets.retain(|_, dataset| { + let matches_durable = (dataset.pool + == *durable_zone.zone_type.durable_zpool().unwrap()) + && (dataset.kind + == durable_zone.zone_type.durable_dataset().unwrap().kind); + let root_dataset = root_zone.filesystem_dataset().unwrap(); + let matches_root = (dataset.pool == *root_dataset.pool()) + && (dataset.kind == *root_dataset.dataset()); + !matches_durable && !matches_root + }); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZoneMissingFilesystemDataset { + zone: root_zone.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZoneMissingDurableDataset { + zone: durable_zone, + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_sled_missing_datasets() { + static TEST_NAME: &str = "test_sled_missing_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick one sled and remove its blueprint_datasets entry entirely. + let removed_sled_id = *blueprint + .blueprint_datasets + .keys() + .next() + .expect("at least one sled"); + blueprint + .blueprint_datasets + .retain(|&sled_id, _| sled_id != removed_sled_id); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + let mut found_sled_missing_note = false; + for note in report.notes() { + if note.severity == Severity::Fatal { + match ¬e.kind { + Kind::Sled { + sled_id, + kind: SledKind::SledMissingDatasets { .. }, + } if *sled_id == removed_sled_id => { + found_sled_missing_note = true; + } + _ => (), + } + } + } + assert!( + found_sled_missing_note, + "did not find expected note for missing datasets entry for \ + sled {removed_sled_id}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_sled_missing_disks() { + static TEST_NAME: &str = "test_sled_missing_disks"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick one sled and remove its blueprint_disks entry entirely. + let removed_sled_id = *blueprint + .blueprint_disks + .keys() + .next() + .expect("at least one sled"); + blueprint + .blueprint_disks + .retain(|&sled_id, _| sled_id != removed_sled_id); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + let mut found_sled_missing_note = false; + for note in report.notes() { + if note.severity == Severity::Fatal { + match ¬e.kind { + Kind::Sled { + sled_id, + kind: SledKind::SledMissingDisks { .. }, + } if *sled_id == removed_sled_id => { + found_sled_missing_note = true; + } + _ => (), + } + } + } + assert!( + found_sled_missing_note, + "did not find expected note for missing disks entry for \ + sled {removed_sled_id}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_orphaned_datasets() { + static TEST_NAME: &str = "test_orphaned_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick two zones (one with a durable dataset and one with a filesystem + // root dataset), and remove both those zones, which should orphan their + // datasets. + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + let zones_config = blueprint + .blueprint_zones + .get_mut(sled_id) + .expect("got zones for sled with datasets"); + let mut durable_zone = None; + let mut root_zone = None; + for z in &zones_config.zones { + if durable_zone.is_none() { + if z.zone_type.durable_zpool().is_some() { + durable_zone = Some(z.clone()); + } + } else if root_zone.is_none() { + root_zone = Some(z.clone()); + break; + } + } + let durable_zone = + durable_zone.expect("found zone with durable dataset to prune"); + let root_zone = + root_zone.expect("found zone with root dataset to prune"); + zones_config + .zones + .retain(|z| z.id != durable_zone.id && z.id != root_zone.id); + + let durable_dataset = durable_zone.zone_type.durable_dataset().unwrap(); + let root_dataset = root_zone.filesystem_dataset().unwrap(); + + // Find the datasets we expect to have been orphaned. + let expected_notes = datasets_config + .datasets + .values() + .filter_map(|dataset| { + if (dataset.pool == durable_dataset.dataset.pool_name + && dataset.kind == durable_dataset.kind) + || (dataset.pool == *root_dataset.pool() + && dataset.kind == *root_dataset.dataset()) + { + Some(Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::OrphanedDataset { + dataset: dataset.clone(), + }, + }, + }) + } else { + None + } + }) + .collect::>(); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_dataset_on_nonexistent_zpool() { + static TEST_NAME: &str = "test_dataset_on_nonexistent_zpool"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Remove one zpool from one sled, then check that all datasets on that + // zpool produce report notes. + let (sled_id, disks_config) = blueprint + .blueprint_disks + .iter_mut() + .next() + .expect("at least one sled"); + let removed_disk = disks_config.disks.remove(0); + eprintln!("removed disk {removed_disk:?}"); + + let expected_notes = blueprint + .blueprint_datasets + .get(sled_id) + .unwrap() + .datasets + .values() + .filter_map(|dataset| { + if dataset.pool.id() != removed_disk.pool_id { + return None; + } + + let note = match dataset.kind { + DatasetKind::Debug | DatasetKind::TransientZoneRoot => { + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::OrphanedDataset { + dataset: dataset.clone(), + }, + }, + } + } + _ => Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::DatasetOnNonexistentZpool { + dataset: dataset.clone(), + }, + }, + }, + }; + Some(note) + }) + .collect::>(); + assert!(!expected_notes.is_empty()); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/blippy/src/lib.rs b/nexus/reconfigurator/blippy/src/lib.rs new file mode 100644 index 0000000000..283bbfc0d0 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/lib.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Blippy: the blueprint checker +//! +//! [`Blippy`] performs a variety of checks on blueprints to ensure they are +//! internally-consistent (e.g., "every in-service zone that should have one or +//! more datasets do", or "any given external IP address is used by at most one +//! in-service zone"). It emits [`BlippyReport`]s in the form of a list of +//! [`BlippyNote`]s, each of which has an associated severity and parent +//! component (typically a sled). + +mod blippy; +mod checks; +mod report; + +pub use blippy::Blippy; +pub use blippy::Kind as BlippyKind; +pub use blippy::Note as BlippyNote; +pub use blippy::Severity as BlippySeverity; +pub use blippy::SledKind as BlippySledKind; +pub use report::BlippyReport; +pub use report::BlippyReportSortKey; diff --git a/nexus/reconfigurator/blippy/src/report.rs b/nexus/reconfigurator/blippy/src/report.rs new file mode 100644 index 0000000000..136d3b7538 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/report.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::blippy::Note; +use core::fmt; +use nexus_types::deployment::Blueprint; + +#[derive(Debug, Clone, Copy)] +pub enum BlippyReportSortKey { + Kind, + Severity, +} + +#[derive(Debug)] +pub struct BlippyReport<'a> { + blueprint: &'a Blueprint, + notes: Vec, + sort_key: BlippyReportSortKey, +} + +impl<'a> BlippyReport<'a> { + pub(crate) fn new( + blueprint: &'a Blueprint, + notes: Vec, + sort_key: BlippyReportSortKey, + ) -> Self { + let mut slf = Self { blueprint, notes, sort_key }; + slf.sort_notes_by_key(sort_key); + slf + } + + pub fn sort_notes_by_key(&mut self, key: BlippyReportSortKey) { + match key { + BlippyReportSortKey::Kind => { + self.notes.sort_unstable_by(|a, b| { + let a = (&a.kind, &a.severity); + let b = (&b.kind, &b.severity); + a.cmp(&b) + }); + } + BlippyReportSortKey::Severity => { + self.notes.sort_unstable_by(|a, b| { + let a = (&a.severity, &a.kind); + let b = (&b.severity, &b.kind); + a.cmp(&b) + }); + } + } + self.sort_key = key; + } + + pub fn blueprint(&self) -> &'a Blueprint { + self.blueprint + } + + pub fn notes(&self) -> &[Note] { + &self.notes + } + + pub fn display(&self) -> BlippyReportDisplay<'_> { + BlippyReportDisplay { report: self } + } +} + +#[derive(Debug)] +pub struct BlippyReportDisplay<'a> { + report: &'a BlippyReport<'a>, +} + +impl fmt::Display for BlippyReportDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let pluralize = if self.report.notes.len() == 1 { "" } else { "s" }; + writeln!( + f, + "blippy report for blueprint {}: {} note{pluralize}", + self.report.blueprint.id, + self.report.notes.len(), + )?; + for note in self.report.notes() { + writeln!(f, " {}", note.display(self.report.sort_key))?; + } + Ok(()) + } +} diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 5ba1665483..543b9bd278 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -400,7 +400,9 @@ fn register_dataset_records_step<'a>( &opctx, datastore, bp_id, - blueprint.all_omicron_datasets(BlueprintDatasetFilter::All), + blueprint + .all_omicron_datasets(BlueprintDatasetFilter::All) + .map(|(_sled_id, dataset)| dataset), ) .await?; diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index efed173a71..6656693a90 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -19,6 +19,7 @@ ipnet.workspace = true itertools.workspace = true nexus-config.workspace = true nexus-inventory.workspace = true +nexus-reconfigurator-blippy.workspace = true nexus-sled-agent-shared.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index ad59fb3718..0aaadb624d 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -1834,11 +1834,13 @@ impl<'a> BlueprintBuilder<'a> { let mut skip_zpools = BTreeSet::new(); for zone_config in self .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) + .filter(|z| z.zone_type.kind() == zone_kind) { if let Some(zpool) = zone_config.zone_type.durable_zpool() { - if zone_kind == zone_config.zone_type.kind() { - skip_zpools.insert(zpool); - } + skip_zpools.insert(zpool); + } + if let Some(zpool) = &zone_config.filesystem_pool { + skip_zpools.insert(zpool); } } @@ -1901,211 +1903,34 @@ impl<'a> BlueprintBuilder<'a> { #[cfg(test)] pub mod test { use super::*; - use crate::blueprint_builder::external_networking::ExternalIpAllocator; use crate::example::example; use crate::example::ExampleSystemBuilder; use crate::example::SimRngState; use crate::system::SledBuilder; use nexus_inventory::CollectionBuilder; - use nexus_types::deployment::BlueprintDatasetConfig; + use nexus_reconfigurator_blippy::Blippy; + use nexus_reconfigurator_blippy::BlippyReportSortKey; use nexus_types::deployment::BlueprintDatasetDisposition; - use nexus_types::deployment::BlueprintDatasetFilter; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneNetworkResources; use nexus_types::external_api::views::SledPolicy; use omicron_common::address::IpRange; - use omicron_common::disk::DatasetKind; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::DatasetUuid; use std::collections::BTreeSet; use std::mem; pub const DEFAULT_N_SLEDS: usize = 3; - fn datasets_for_sled( - blueprint: &Blueprint, - sled_id: SledUuid, - ) -> &BTreeMap { - &blueprint - .blueprint_datasets - .get(&sled_id) - .unwrap_or_else(|| { - panic!("Cannot find datasets on missing sled: {sled_id}") - }) - .datasets - } - - fn find_dataset<'a>( - datasets: &'a BTreeMap, - zpool: &ZpoolName, - kind: DatasetKind, - ) -> &'a BlueprintDatasetConfig { - datasets.values().find(|dataset| { - &dataset.pool == zpool && - dataset.kind == kind - }).unwrap_or_else(|| { - let kinds = datasets.values().map(|d| (&d.id, &d.pool, &d.kind)).collect::>(); - panic!("Cannot find dataset of type {kind}\nFound the following: {kinds:#?}") - }) - } - /// Checks various conditions that should be true for all blueprints #[track_caller] pub fn verify_blueprint(blueprint: &Blueprint) { - // There should be no duplicate underlay IPs. - let mut underlay_ips: BTreeMap = - BTreeMap::new(); - for (_, zone) in - blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - { - if let Some(previous) = - underlay_ips.insert(zone.underlay_ip(), zone) - { - panic!( - "found duplicate underlay IP {} in zones {} and {}\ - \n\n\ - blueprint: {}", - zone.underlay_ip(), - zone.id, - previous.id, - blueprint.display(), - ); - } - } - - // There should be no duplicate external IPs. - // - // Checking this is slightly complicated due to SNAT IPs, so we'll - // delegate to an `ExternalIpAllocator`, which already contains the - // logic for dup checking. (`mark_ip_used` fails if the IP is _already_ - // marked as used.) - // - // We create this with an empty set of service IP pool ranges; those are - // used for allocation, which we don't do, and aren't needed for - // duplicate checking. - let mut ip_allocator = ExternalIpAllocator::new(&[]); - for (external_ip, _nic) in blueprint - .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - .filter_map(|(_, zone)| zone.zone_type.external_networking()) - { - ip_allocator - .mark_ip_used(&external_ip) - .expect("no duplicate external IPs in running zones"); - } - - // On any given zpool, we should have at most one zone of any given - // kind. - // - // TODO: we may want a similar check for non-durable datasets? - let mut kinds_by_zpool: BTreeMap< - ZpoolUuid, - BTreeMap, - > = BTreeMap::new(); - for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { - if let Some(dataset) = zone.zone_type.durable_dataset() { - let kind = zone.zone_type.kind(); - if let Some(previous) = kinds_by_zpool - .entry(dataset.dataset.pool_name.id()) - .or_default() - .insert(kind, zone.id) - { - panic!( - "zpool {} has two zones of kind {kind:?}: {} and {}\ - \n\n\ - blueprint: {}", - dataset.dataset.pool_name, - zone.id, - previous, - blueprint.display(), - ); - } - } - } - - // All disks should have debug and zone root datasets. - for (sled_id, disk_config) in &blueprint.blueprint_disks { - for disk in &disk_config.disks { - eprintln!( - "checking datasets for sled {sled_id} disk {}", - disk.id - ); - let zpool = ZpoolName::new_external(disk.pool_id); - let datasets = datasets_for_sled(&blueprint, *sled_id); - - let dataset = - find_dataset(&datasets, &zpool, DatasetKind::Debug); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - let dataset = find_dataset( - &datasets, - &zpool, - DatasetKind::TransientZoneRoot, - ); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - } - } - - // All zones should have dataset records. - for (sled_id, zone_config) in - blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - { - match blueprint.sled_state.get(&sled_id) { - // Decommissioned sleds don't keep dataset state around. - // - // Normally we wouldn't observe zones from decommissioned sleds - // anyway, but that's the responsibility of the Planner, not the - // BlueprintBuilder. - None | Some(SledState::Decommissioned) => continue, - Some(SledState::Active) => (), - } - let datasets = datasets_for_sled(&blueprint, sled_id); - - let (zpool, kind) = - zone_config.filesystem_dataset().unwrap().into_parts(); - let dataset = find_dataset(&datasets, &zpool, kind); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - - if let Some(durable_dataset) = - zone_config.zone_type.durable_dataset() - { - let zpool = &durable_dataset.dataset.pool_name; - let dataset = - find_dataset(&datasets, &zpool, durable_dataset.kind); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - } - } - - // All datasets should be on zpools that have disk records. - for (sled_id, datasets) in &blueprint.blueprint_datasets { - let sled_disk_zpools = blueprint - .blueprint_disks - .get(&sled_id) - .expect("no disks for sled") - .disks - .iter() - .map(|disk| disk.pool_id) - .collect::>(); - - for dataset in datasets.datasets.values().filter(|dataset| { - dataset.disposition.matches(BlueprintDatasetFilter::InService) - }) { - assert!( - sled_disk_zpools.contains(&dataset.pool.id()), - "sled {sled_id} has dataset {dataset:?}, \ - which references a zpool without an associated disk", - ); - } + let blippy_report = + Blippy::new(blueprint).into_report(BlippyReportSortKey::Kind); + if !blippy_report.notes().is_empty() { + eprintln!("{}", blueprint.display()); + eprintln!("---"); + eprintln!("{}", blippy_report.display()); + panic!("expected blippy report for blueprint to have no notes"); } } @@ -2314,6 +2139,20 @@ pub mod test { *blueprint1.sled_state.get_mut(&decommision_sled_id).unwrap() = SledState::Decommissioned; + // We're going under the hood of the blueprint here; a sled can only get + // to the decommissioned state if all its disks/datasets/zones have been + // expunged, so do that too. + for zone in &mut blueprint1 + .blueprint_zones + .get_mut(&decommision_sled_id) + .expect("has zones") + .zones + { + zone.disposition = BlueprintZoneDisposition::Expunged; + } + blueprint1.blueprint_datasets.remove(&decommision_sled_id); + blueprint1.blueprint_disks.remove(&decommision_sled_id); + // Change the input to note that the sled is expunged, but still active. let mut builder = input.into_builder(); builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().policy = diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs index 56fa39374d..4222481149 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs @@ -102,6 +102,7 @@ pub mod test { use crate::blueprint_builder::test::verify_blueprint; use crate::example::ExampleSystemBuilder; use nexus_types::deployment::BlueprintZoneFilter; + use omicron_common::disk::DatasetKind; use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_test_utils::dev::test_setup_log; @@ -128,6 +129,24 @@ pub mod test { let npruned = blueprint1.blueprint_zones.len() - 1; assert!(npruned > 0); + // Also prune out the zones' datasets, or we're left with an invalid + // blueprint. + for (_, dataset_config) in + blueprint1.blueprint_datasets.iter_mut().skip(1) + { + dataset_config.datasets.retain(|_id, dataset| { + // This is gross; once zone configs know explicit dataset IDs, + // we should retain by ID instead. + match &dataset.kind { + DatasetKind::InternalDns => false, + DatasetKind::TransientZone { name } => { + !name.starts_with("oxz_internal_dns") + } + _ => true, + } + }); + } + verify_blueprint(&blueprint1); // Create an allocator. diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a18bb6e3d3..ce1a1ae960 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1253,6 +1253,21 @@ mod test { for (_sled_id, zones) in blueprint1.blueprint_zones.iter_mut().take(2) { zones.zones.retain(|z| !z.zone_type.is_internal_dns()); } + for (_, dataset_config) in + blueprint1.blueprint_datasets.iter_mut().take(2) + { + dataset_config.datasets.retain(|_id, dataset| { + // This is gross; once zone configs know explicit dataset IDs, + // we should retain by ID instead. + match &dataset.kind { + DatasetKind::InternalDns => false, + DatasetKind::TransientZone { name } => { + !name.starts_with("oxz_internal_dns") + } + _ => true, + } + }); + } let blueprint2 = Planner::new_based_on( logctx.log.clone(), diff --git a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt index f2d8334027..45d7feb667 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt @@ -61,8 +61,8 @@ to: blueprint fe13be30-94c2-4fa6-aad5-ae3c5028f6bb oxp_f843fb62-0f04-4c7d-a56f-62531104dc77/crypt/zone/oxz_crucible_fc4f1769-9611-42d3-b8c1-f2be9b5359f6 35fa6ec8-6b58-4fcc-a5a2-36e66736e9c1 none none off oxp_96569b61-9e0c-4ee7-bd11-a5e0c541ca99/crypt/zone/oxz_crucible_fff71a84-09c2-4dab-bc18-8f4570f278bb 00abfe99-288d-4a63-abea-adfa62e74524 none none off oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_197067bc-9a21-444e-9794-6051d9f78a00 19736dbd-1d01-41e9-a800-ffc450464c2d none none off - oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2 8be4aa2f-1612-4bdf-a0f6-7458b151308f none none off - oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c 7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b none none off + oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2 8be4aa2f-1612-4bdf-a0f6-7458b151308f none none off + oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c 7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b none none off oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_internal_dns_1e9422ca-a3d9-4435-bb17-39d5ad22b4ba 5651c4fb-d146-4270-8794-6ed7ceb6f130 none none off oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_internal_dns_4a0ec9f6-6ce6-4456-831e-5f8df7b57332 d2b9f103-8bf1-4603-873d-cec130430ba7 none none off oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_internal_dns_efecb8a2-ce0b-416f-958b-de1fad1bef02 158e226c-e44e-427f-93af-ee96d2cfb9be none none off diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt index ef93527fa3..6414749fce 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt @@ -177,8 +177,8 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e oxp_9d833141-18a1-4f24-8a34-6076c026aa87/crypt/debug f631d6a1-9db5-4fc7-978b-9ace485dfe16 100 GiB none gzip-9 oxp_a279461f-a7b9-413f-a79f-cb4dab4c3fce/crypt/debug 1b9c97d6-c90d-4109-b99c-9ab799b3c3b9 100 GiB none gzip-9 oxp_ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59/crypt/debug 9427caff-29ec-4cd1-981b-26d4a7900052 100 GiB none gzip-9 -+ oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e 7d47e5d6-a1a5-451a-b4b4-3a9747f8154a none none off -+ oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243 a759d2f3-003c-4fb8-b06b-f985e213b273 none none off ++ oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e 7d47e5d6-a1a5-451a-b4b4-3a9747f8154a none none off ++ oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243 a759d2f3-003c-4fb8-b06b-f985e213b273 none none off omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt index 803d4ea2a1..39b3398b2e 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt @@ -288,8 +288,8 @@ to: blueprint 74f2e7fd-687e-4c9e-b5d8-e474a5bb8e7c oxp_fe379ac6-1938-4cc2-93a9-43b1447229ae/crypt/debug 3a49dd24-8ead-4196-b453-8aa3273b77d1 100 GiB none gzip-9 + oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/clickhouse 410eca9c-8eee-4a98-aea2-a363697974f7 none none off + oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_clickhouse_fa97835a-aabc-4fe9-9e85-3e50f207129c 08f15d4b-91dc-445d-88f4-cb9fa585444b none none off -+ oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off -+ oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off ++ oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off ++ oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off omicron zones generation 3 -> 4: diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt index 93dc17c180..2c90c981fb 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt @@ -188,9 +188,9 @@ to: blueprint df68d4d4-5af4-4b56-95bb-1654a6957d4f oxp_427b2ccd-998f-4085-af21-e600604cf21e/crypt/zone/oxz_crucible_befe73dd-5970-49a4-9adf-7b4f453c45cf 95d72ef9-e070-49e4-a57b-2c392def6025 none none off oxp_2fa34d8e-13d9-42d3-b8ba-ca9d74ac496a/crypt/zone/oxz_crucible_d9106a19-f267-48db-a82b-004e643feb49 9b9fb14e-cd17-4a7a-a74a-bfd9c7682831 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_6c7f6a84-78b3-4dd9-878e-51bedfda471f aa190e01-9a4e-4131-9fcf-240532108c7f none none off - oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off + oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_internal_dns_0c42ad01-b854-4e7d-bd6c-25fdc3eddef4 1de9cde7-6c1e-4865-bd3d-378e22f62fb8 none none off - oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off + oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_7e763480-0f4f-43cb-ab9a-52b667d8fda5 5773e3b1-dde0-4b54-bc13-3c3bf816015e none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_ntp_f34f8d36-7137-48d3-9d13-6a46c4edcef4 c8c03dec-65d4-4c97-87c3-a43a8363c97c none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/debug f015e445-2e52-45c9-9f0a-49cb5ceae245 100 GiB none gzip-9 diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index f4f63ad96a..8bb7635d75 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -353,9 +353,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/debug 2dfc5c53-6618-4352-b754-86ef6463c20a 100 GiB none gzip-9 oxp_e405da11-cb6b-4ebc-bac1-9bc997352e10/crypt/debug 61a653cf-44a6-43c0-90e1-bec539511703 100 GiB none gzip-9 oxp_f4d7f914-ec73-4b65-8696-5068591d9065/crypt/debug b803d901-7e43-42fa-8372-43c3c5b3c1a9 100 GiB none gzip-9 -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off ++ oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off ++ oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off ++ oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off omicron zones generation 2 -> 3: @@ -443,9 +443,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/debug 73674f4b-1d93-404a-bc9c-8395efac97fd 100 GiB none gzip-9 oxp_f1693454-aac1-4265-b8a0-4e9f3f41c7b3/crypt/debug 938737fb-b72f-4727-8833-9697c518ca37 100 GiB none gzip-9 oxp_fe4fdfba-3b6d-47d3-8612-1fb2390b650a/crypt/debug 8e58b91f-9ce2-4256-8dec-5f90f31a73fa 100 GiB none gzip-9 -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off ++ oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off ++ oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off ++ oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 72750bc8f0..9fe4b5218b 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -55,10 +55,10 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/zone/oxz_crucible_c60379ba-4e30-4628-a79a-0ae509aef4c5 a6fcf496-70a1-49bf-a951-62fcec8dd5e2 none none off oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_crucible_f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 c596346d-4040-4103-b036-8fafdbaada00 none none off oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_crucible_f1a7b9a7-fc6a-4b23-b829-045ff33117ff c864de0d-9859-4ad1-a30b-f5ac45ba03ed none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off + oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off + oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_a732c489-d29a-4f75-b900-5966385943af db6c139b-9028-4d8e-92c7-6cc1e9aa0131 none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off + oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_ntp_621509d6-3772-4009-aca1-35eefd1098fb 3b5822d2-9918-4bd6-8b75-2f52bdd73189 none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/debug bf9b39db-5a6a-4b45-b2da-c37425271014 100 GiB none gzip-9 oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/debug 1b4e8d9e-e447-4df1-8e0b-57edc318e8ad 100 GiB none gzip-9 @@ -145,10 +145,10 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/zone/oxz_crucible_be920398-024a-4655-8c49-69b5ac48dfff 87f757d6-fa4c-4423-995c-1eab5e7d09a2 none none off oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_crucible_d47f4996-fac0-4657-bcea-01b1fee6404d c1af262a-2595-4236-98c8-21c5b63c80c3 none none off oxp_789d607d-d196-428e-a988-f7886a327859/crypt/zone/oxz_crucible_e001fea0-6594-4ece-97e3-6198c293e931 5e27b9bc-e69f-4258-83f2-5f9a1109a625 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off + oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off + oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf 45d32c13-cbbb-4382-a0ed-dc6574b827b7 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off + oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_ntp_bf79a56a-97af-4cc4-94a5-8b20d64c2cda a410308c-e2cb-4e4d-9da6-1879336f93f2 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/debug 755e24a8-67cc-44b1-8c25-2dcb3acd988f 100 GiB none gzip-9 oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/debug c834f8cd-25ee-4c62-af03-49cef53fc4c1 100 GiB none gzip-9 diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 806df52711..afe906e781 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -244,11 +244,13 @@ impl Blueprint { pub fn all_omicron_datasets( &self, filter: BlueprintDatasetFilter, - ) -> impl Iterator { + ) -> impl Iterator { self.blueprint_datasets .iter() - .flat_map(move |(_, datasets)| datasets.datasets.values()) - .filter(move |d| d.disposition.matches(filter)) + .flat_map(move |(sled_id, datasets)| { + datasets.datasets.values().map(|dataset| (*sled_id, dataset)) + }) + .filter(move |(_, d)| d.disposition.matches(filter)) } /// Iterate over the [`BlueprintZoneConfig`] instances in the blueprint @@ -630,7 +632,17 @@ fn zone_sort_key(z: &T) -> impl Ord { /// Describes one Omicron-managed zone in a blueprint. /// /// Part of [`BlueprintZonesConfig`]. -#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive( + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, +)] pub struct BlueprintZoneConfig { /// The disposition (desired state) of this zone recorded in the blueprint. pub disposition: BlueprintZoneDisposition, @@ -980,7 +992,17 @@ impl BlueprintDatasetDisposition { } /// Information about a dataset as recorded in a blueprint -#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive( + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, +)] pub struct BlueprintDatasetConfig { // TODO: Display this in diffs - leave for now, for backwards compat pub disposition: BlueprintDatasetDisposition, diff --git a/nexus/types/src/deployment/network_resources.rs b/nexus/types/src/deployment/network_resources.rs index cdabbc7fdc..f11d739d03 100644 --- a/nexus/types/src/deployment/network_resources.rs +++ b/nexus/types/src/deployment/network_resources.rs @@ -147,7 +147,18 @@ impl OmicronZoneNetworkResources { } /// External IP variants possible for Omicron-managed zones. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + Copy, + Hash, + PartialOrd, + Ord, + PartialEq, + Eq, + Serialize, + Deserialize, +)] pub enum OmicronZoneExternalIp { Floating(OmicronZoneExternalFloatingIp), Snat(OmicronZoneExternalSnatIp), @@ -199,7 +210,17 @@ pub enum OmicronZoneExternalIpKey { /// necessary for blueprint planning, and requires that the zone have a single /// IP. #[derive( - Debug, Clone, Copy, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, + Debug, + Clone, + Copy, + Hash, + PartialOrd, + Ord, + PartialEq, + Eq, + JsonSchema, + Serialize, + Deserialize, )] pub struct OmicronZoneExternalFloatingIp { pub id: ExternalIpUuid, @@ -208,7 +229,16 @@ pub struct OmicronZoneExternalFloatingIp { /// Floating external address with port allocated to an Omicron-managed zone. #[derive( - Debug, Clone, Copy, PartialEq, Eq, JsonSchema, Serialize, Deserialize, + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + JsonSchema, + Serialize, + Deserialize, )] pub struct OmicronZoneExternalFloatingAddr { pub id: ExternalIpUuid, @@ -227,7 +257,17 @@ impl OmicronZoneExternalFloatingAddr { /// necessary for blueprint planning, and requires that the zone have a single /// IP. #[derive( - Debug, Clone, Copy, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, + Debug, + Clone, + Copy, + Hash, + PartialOrd, + Ord, + PartialEq, + Eq, + JsonSchema, + Serialize, + Deserialize, )] pub struct OmicronZoneExternalSnatIp { pub id: ExternalIpUuid, diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index 86310cfc8f..ffb4bd5a17 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -21,7 +21,17 @@ use serde::Serialize; use std::net::Ipv6Addr; use std::net::SocketAddrV6; -#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive( + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, +)] #[serde(tag = "type", rename_all = "snake_case")] pub enum BlueprintZoneType { BoundaryNtp(blueprint_zone_type::BoundaryNtp), @@ -335,7 +345,15 @@ pub mod blueprint_zone_type { use std::net::SocketAddrV6; #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct BoundaryNtp { pub address: SocketAddrV6, @@ -349,7 +367,15 @@ pub mod blueprint_zone_type { /// Used in single-node clickhouse setups #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct Clickhouse { pub address: SocketAddrV6, @@ -357,7 +383,15 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct ClickhouseKeeper { pub address: SocketAddrV6, @@ -366,7 +400,15 @@ pub mod blueprint_zone_type { /// Used in replicated clickhouse setups #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct ClickhouseServer { pub address: SocketAddrV6, @@ -374,7 +416,15 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct CockroachDb { pub address: SocketAddrV6, @@ -382,7 +432,15 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct Crucible { pub address: SocketAddrV6, @@ -390,14 +448,30 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct CruciblePantry { pub address: SocketAddrV6, } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct ExternalDns { pub dataset: OmicronZoneDataset, @@ -410,7 +484,15 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct InternalDns { pub dataset: OmicronZoneDataset, @@ -430,14 +512,30 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct InternalNtp { pub address: SocketAddrV6, } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct Nexus { /// The address at which the internal nexus server is reachable. @@ -453,7 +551,15 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, )] pub struct Oximeter { pub address: SocketAddrV6,