From 5fb9029a0f03a18969b41c5c7f96a35f35abfc2f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 8 Aug 2024 16:10:12 -0700 Subject: [PATCH] Reconfigurator: Add support for boundary NTP zones (#6259) This is almost entirely planner work; no executor work was required, because it already supports "zones with external networking", and there's nothing special to boundary NTP beyond that from an execution point of view. I put this through a fairly thorough test on london; I'll put my notes from that in comments on the PR momentarily. This builds on #6050 and should be merged after it. --- common/src/address.rs | 3 + dev-tools/reconfigurator-cli/src/main.rs | 29 +- nexus/reconfigurator/execution/src/dns.rs | 5 +- .../planner/omicron_zone_placement.txt | 1 + .../planning/src/blueprint_builder/builder.rs | 247 +++++++++++++++--- .../blueprint_builder/external_networking.rs | 121 +++++++-- .../planning/src/blueprint_builder/zones.rs | 43 ++- nexus/reconfigurator/planning/src/planner.rs | 101 ++++--- .../src/planner/omicron_zone_placement.rs | 80 ++++-- nexus/reconfigurator/planning/src/system.rs | 11 +- nexus/reconfigurator/preparation/src/lib.rs | 4 + nexus/src/app/deployment.rs | 2 + nexus/types/src/deployment/planning_input.rs | 8 + sled-agent/src/rack_setup/plan/service.rs | 12 +- 14 files changed, 518 insertions(+), 149 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 44942a9854..5ed5689289 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -25,6 +25,9 @@ pub const MAX_PORT: u16 = u16::MAX; /// minimum possible value for a tcp or udp port pub const MIN_PORT: u16 = u16::MIN; +/// The amount of redundancy for boundary NTP servers. +pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; + /// The amount of redundancy for Nexus services. /// /// This is used by both RSS (to distribute the initial set of services) and the diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 983dde412d..13e4617679 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -34,6 +34,7 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::Name; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::VnicUuid; use reedline::{Reedline, Signal}; @@ -435,6 +436,8 @@ enum BlueprintEditCommands { }, /// add a CockroachDB instance to a particular sled AddCockroach { sled_id: SledUuid }, + /// expunge a particular zone from a particular sled + ExpungeZone { sled_id: SledUuid, zone_id: OmicronZoneUuid }, } #[derive(Debug, Args)] @@ -747,8 +750,8 @@ fn cmd_blueprint_edit( let label = match args.edit_command { BlueprintEditCommands::AddNexus { sled_id } => { - let current = - builder.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); + let current = builder + .sled_num_running_zones_of_kind(sled_id, ZoneKind::Nexus); let added = builder .sled_ensure_zone_multiple_nexus(sled_id, current + 1) .context("failed to add Nexus zone")?; @@ -759,8 +762,8 @@ fn cmd_blueprint_edit( format!("added Nexus zone to sled {}", sled_id) } BlueprintEditCommands::AddCockroach { sled_id } => { - let current = - builder.sled_num_zones_of_kind(sled_id, ZoneKind::CockroachDb); + let current = builder + .sled_num_running_zones_of_kind(sled_id, ZoneKind::CockroachDb); let added = builder .sled_ensure_zone_multiple_cockroachdb(sled_id, current + 1) .context("failed to add CockroachDB zone")?; @@ -770,9 +773,25 @@ fn cmd_blueprint_edit( ); format!("added CockroachDB zone to sled {}", sled_id) } + BlueprintEditCommands::ExpungeZone { sled_id, zone_id } => { + builder + .sled_expunge_zone(sled_id, zone_id) + .context("failed to expunge zone")?; + format!("expunged zone {zone_id} from sled {sled_id}") + } }; - let new_blueprint = builder.build(); + let mut new_blueprint = builder.build(); + + // Normally `builder.build()` would construct the cockroach fingerprint + // based on what we read from CRDB and put into the planning input, but + // since we don't have a CRDB we had to make something up for our planning + // input's CRDB fingerprint. In the absense of a better alternative, we'll + // just copy our parent's CRDB fingerprint and carry it forward. + new_blueprint + .cockroachdb_fingerprint + .clone_from(&blueprint.cockroachdb_fingerprint); + let rv = format!( "blueprint {} created from blueprint {}: {}", new_blueprint.id, blueprint_id, label diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 690a4348b0..8bcae27bc0 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -499,6 +499,7 @@ mod test { use omicron_common::address::get_switch_zone_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; + use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; @@ -1313,6 +1314,7 @@ mod test { cockroachdb_settings: &CockroachDbSettings::empty(), external_ip_rows: &[], service_nic_rows: &[], + target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, target_nexus_zone_count: NEXUS_REDUNDANCY, target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: @@ -1340,7 +1342,8 @@ mod test { .unwrap(); let sled_id = blueprint.sleds().next().expect("expected at least one sled"); - let nalready = builder.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); + let nalready = + builder.sled_num_running_zones_of_kind(sled_id, ZoneKind::Nexus); let rv = builder .sled_ensure_zone_multiple_nexus(sled_id, nalready + 1) .unwrap(); diff --git a/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt b/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt index bb2ad481bc..17ae1771d1 100644 --- a/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt +++ b/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt @@ -5,3 +5,4 @@ # It is recommended to check this file in to source control so that # everyone who runs the test benefits from these saved cases. cc 72b902d1405681df2dd46efc097da6840ff1234dc9d0d7c0ecf07bed0b0e7d8d # shrinks to input = _TestPlaceOmicronZonesArgs { input: ArbitraryTestInput { existing_sleds: {[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]: ExistingSled { zones: ZonesToPlace { zones: [] }, waiting_for_ntp: false, num_disks: 1 }}, zones_to_place: ZonesToPlace { zones: [Nexus] } } } +cc d725ad7fd51d0409c2f24088730159c1c3043a7675d46b966e45cb86b570a141 # shrinks to input = _TestPlaceOmicronZonesArgs { input: ArbitraryTestInput { existing_sleds: {[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]: ExistingSled { zones: ZonesToPlace { zones: [] }, num_zpools: 2 }}, zones_to_place: ZonesToPlace { zones: [BoundaryNtp, BoundaryNtp] } } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 09ae4132f3..2d8a7c9598 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -26,6 +26,7 @@ use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::OmicronZoneExternalFloatingIp; +use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledFilter; @@ -71,6 +72,7 @@ use typed_rng::UuidRng; use super::external_networking::BuilderExternalNetworking; use super::external_networking::ExternalNetworkingChoice; +use super::external_networking::ExternalSnatNetworkingChoice; use super::zones::is_already_expunged; use super::zones::BuilderZoneState; use super::zones::BuilderZonesConfig; @@ -86,12 +88,14 @@ pub enum Error { NoAvailableZpool { sled_id: SledUuid, kind: ZoneKind }, #[error("no Nexus zones exist in parent blueprint")] NoNexusZonesInParentBlueprint, + #[error("no Boundary NTP zones exist in parent blueprint")] + NoBoundaryNtpZonesInParentBlueprint, #[error("no external service IP addresses are available")] NoExternalServiceIpAvailable, #[error("no system MAC addresses are available")] NoSystemMacAddressAvailable, - #[error("exhausted available Nexus IP addresses")] - ExhaustedNexusIps, + #[error("exhausted available OPTE IP addresses for service {kind:?}")] + ExhaustedOpteIps { kind: ZoneKind }, #[error( "invariant violation: found decommissioned sled with \ {num_zones} non-expunged zones: {sled_id}" @@ -101,7 +105,7 @@ pub enum Error { num_zones: usize, }, #[error("programming error in planner")] - Planner(#[from] anyhow::Error), + Planner(#[source] anyhow::Error), } /// Describes whether an idempotent "ensure" operation resulted in action taken @@ -341,8 +345,9 @@ impl<'a> BlueprintBuilder<'a> { pub fn current_sled_zones( &self, sled_id: SledUuid, + filter: BlueprintZoneFilter, ) -> impl Iterator { - self.zones.current_sled_zones(sled_id).map(|(config, _)| config) + self.zones.current_sled_zones(sled_id, filter).map(|(config, _)| config) } /// Assemble a final [`Blueprint`] based on the contents of the builder @@ -432,7 +437,8 @@ impl<'a> BlueprintBuilder<'a> { // Do any zones need to be marked expunged? let mut zones_to_expunge = BTreeMap::new(); - let sled_zones = self.zones.current_sled_zones(sled_id); + let sled_zones = + self.zones.current_sled_zones(sled_id, BlueprintZoneFilter::All); for (zone_config, state) in sled_zones { let zone_id = zone_config.id; let log = log.new(o!( @@ -498,9 +504,9 @@ impl<'a> BlueprintBuilder<'a> { change .expunge_zones(zones_to_expunge.keys().cloned().collect()) .map_err(|error| { - anyhow!(error).context(format!( + Error::Planner(anyhow!(error).context(format!( "for sled {sled_id}, error expunging zones" - )) + ))) })?; // Finally, add comments describing what happened. @@ -620,7 +626,7 @@ impl<'a> BlueprintBuilder<'a> { // If there's already an NTP zone on this sled, do nothing. let has_ntp = self .zones - .current_sled_zones(sled_id) + .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) .any(|(z, _)| z.zone_type.is_ntp()); if has_ntp { return Ok(Ensure::NotNeeded); @@ -687,8 +693,10 @@ impl<'a> BlueprintBuilder<'a> { let pool_name = ZpoolName::new_external(zpool_id); // If this sled already has a Crucible zone on this pool, do nothing. - let has_crucible_on_this_pool = - self.zones.current_sled_zones(sled_id).any(|(z, _)| { + let has_crucible_on_this_pool = self + .zones + .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) + .any(|(z, _)| { matches!( &z.zone_type, BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { @@ -739,13 +747,13 @@ impl<'a> BlueprintBuilder<'a> { /// /// This value may change before a blueprint is actually generated if /// further changes are made to the builder. - pub fn sled_num_zones_of_kind( + pub fn sled_num_running_zones_of_kind( &self, sled_id: SledUuid, kind: ZoneKind, ) -> usize { self.zones - .current_sled_zones(sled_id) + .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) .filter(|(z, _)| z.zone_type.kind() == kind) .count() } @@ -793,7 +801,8 @@ impl<'a> BlueprintBuilder<'a> { external_dns_servers: Vec, ) -> Result { // How many Nexus zones do we need to add? - let nexus_count = self.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); + let nexus_count = + self.sled_num_running_zones_of_kind(sled_id, ZoneKind::Nexus); let num_nexus_to_add = match desired_zone_count.checked_sub(nexus_count) { Some(0) => return Ok(EnsureMultiple::NotNeeded), @@ -820,21 +829,19 @@ impl<'a> BlueprintBuilder<'a> { ip: external_ip, }; - let nic = { - NetworkInterface { - id: self.rng.network_interface_rng.next(), - kind: NetworkInterfaceKind::Service { - id: nexus_id.into_untyped_uuid(), - }, - name: format!("nexus-{nexus_id}").parse().unwrap(), - ip: nic_ip, - mac: nic_mac, - subnet: nic_subnet, - vni: Vni::SERVICES_VNI, - primary: true, - slot: 0, - transit_ips: vec![], - } + let nic = NetworkInterface { + id: self.rng.network_interface_rng.next(), + kind: NetworkInterfaceKind::Service { + id: nexus_id.into_untyped_uuid(), + }, + name: format!("nexus-{nexus_id}").parse().unwrap(), + ip: nic_ip, + mac: nic_mac, + subnet: nic_subnet, + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + transit_ips: vec![], }; let ip = self.sled_alloc_ip(sled_id)?; @@ -878,7 +885,7 @@ impl<'a> BlueprintBuilder<'a> { ) -> Result { // How many CRDB zones do we need to add? let crdb_count = - self.sled_num_zones_of_kind(sled_id, ZoneKind::CockroachDb); + self.sled_num_running_zones_of_kind(sled_id, ZoneKind::CockroachDb); let num_crdb_to_add = match desired_zone_count.checked_sub(crdb_count) { Some(0) => return Ok(EnsureMultiple::NotNeeded), Some(n) => n, @@ -920,6 +927,157 @@ impl<'a> BlueprintBuilder<'a> { Ok(EnsureMultiple::Changed { added: num_crdb_to_add, removed: 0 }) } + pub fn sled_promote_internal_ntp_to_boundary_ntp( + &mut self, + sled_id: SledUuid, + ) -> Result { + // The upstream NTP/DNS servers and domain _should_ come from Nexus and + // be modifiable by the operator, but currently can only be set at RSS. + // We can only promote a new boundary NTP zone by copying these settings + // from an existing one. + let (ntp_servers, dns_servers, domain) = self + .parent_blueprint + .all_omicron_zones(BlueprintZoneFilter::All) + .find_map(|(_, z)| match &z.zone_type { + BlueprintZoneType::BoundaryNtp(zone_config) => Some(( + zone_config.ntp_servers.clone(), + zone_config.dns_servers.clone(), + zone_config.domain.clone(), + )), + _ => None, + }) + .ok_or(Error::NoBoundaryNtpZonesInParentBlueprint)?; + + self.sled_promote_internal_ntp_to_boundary_ntp_with_config( + sled_id, + ntp_servers, + dns_servers, + domain, + ) + } + + pub fn sled_promote_internal_ntp_to_boundary_ntp_with_config( + &mut self, + sled_id: SledUuid, + ntp_servers: Vec, + dns_servers: Vec, + domain: Option, + ) -> Result { + // Check the sled id and return an appropriate error if it's invalid. + let _ = self.sled_resources(sled_id)?; + + let sled_zones = self.zones.change_sled_zones(sled_id); + + // Find the internal NTP zone and expunge it. + let mut internal_ntp_zone_id_iter = sled_zones + .iter_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|config| { + if matches!( + config.zone().zone_type, + BlueprintZoneType::InternalNtp(_) + ) { + Some(config.zone().id) + } else { + None + } + }); + + // We should have exactly one internal NTP zone. + let internal_ntp_zone_id = + internal_ntp_zone_id_iter.next().ok_or_else(|| { + Error::Planner(anyhow!( + "cannot promote internal NTP zone on sled {sled_id}: \ + no internal NTP zone found" + )) + })?; + if internal_ntp_zone_id_iter.next().is_some() { + return Err(Error::Planner(anyhow!( + "sled {sled_id} has multiple internal NTP zones" + ))); + } + std::mem::drop(internal_ntp_zone_id_iter); + + // Expunge the internal NTP zone. + sled_zones.expunge_zone(internal_ntp_zone_id).map_err(|error| { + Error::Planner(anyhow!(error).context(format!( + "error expunging internal NTP zone from sled {sled_id}" + ))) + })?; + + // Add the new boundary NTP zone. + let new_zone_id = self.rng.zone_rng.next(); + let ExternalSnatNetworkingChoice { + snat_cfg, + nic_ip, + nic_subnet, + nic_mac, + } = self.external_networking.for_new_boundary_ntp()?; + let external_ip = OmicronZoneExternalSnatIp { + id: self.rng.external_ip_rng.next(), + snat_cfg, + }; + let nic = NetworkInterface { + id: self.rng.network_interface_rng.next(), + kind: NetworkInterfaceKind::Service { + id: new_zone_id.into_untyped_uuid(), + }, + name: format!("ntp-{new_zone_id}").parse().unwrap(), + ip: nic_ip, + mac: nic_mac, + subnet: nic_subnet, + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + transit_ips: vec![], + }; + + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let port = omicron_common::address::NTP_PORT; + let zone_type = + BlueprintZoneType::BoundaryNtp(blueprint_zone_type::BoundaryNtp { + address: SocketAddrV6::new(underlay_ip, port, 0, 0), + ntp_servers, + dns_servers, + domain, + nic, + external_ip, + }); + let filesystem_pool = + self.sled_select_zpool(sled_id, zone_type.kind())?; + + self.sled_add_zone( + sled_id, + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: new_zone_id, + underlay_address: underlay_ip, + filesystem_pool: Some(filesystem_pool), + zone_type, + }, + )?; + + Ok(EnsureMultiple::Changed { added: 1, removed: 1 }) + } + + pub fn sled_expunge_zone( + &mut self, + sled_id: SledUuid, + zone_id: OmicronZoneUuid, + ) -> Result<(), Error> { + // Check the sled id and return an appropriate error if it's invalid. + let _ = self.sled_resources(sled_id)?; + + let sled_zones = self.zones.change_sled_zones(sled_id); + sled_zones.expunge_zone(zone_id).map_err(|error| { + Error::Planner( + anyhow!(error) + .context("failed to expunge zone from sled {sled_id}"), + ) + })?; + + Ok(()) + } + fn sled_add_zone( &mut self, sled_id: SledUuid, @@ -930,8 +1088,10 @@ impl<'a> BlueprintBuilder<'a> { let sled_zones = self.zones.change_sled_zones(sled_id); sled_zones.add_zone(zone).map_err(|error| { - anyhow!(error) - .context(format!("error adding zone to sled {sled_id}")) + Error::Planner( + anyhow!(error) + .context(format!("error adding zone to sled {sled_id}")), + ) })?; Ok(()) @@ -966,7 +1126,10 @@ impl<'a> BlueprintBuilder<'a> { // Record each of the sled's zones' underlay addresses as // allocated. - for (z, _) in self.zones.current_sled_zones(sled_id) { + for (z, _) in self + .zones + .current_sled_zones(sled_id, BlueprintZoneFilter::All) + { allocator.reserve(z.underlay_address); } @@ -995,7 +1158,9 @@ impl<'a> BlueprintBuilder<'a> { // sled already has a durable zone of that kind on the same zpool. Build // up a set of invalid zpools for this sled/kind pair. let mut skip_zpools = BTreeSet::new(); - for zone_config in self.current_sled_zones(sled_id) { + for zone_config in self + .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) + { if let Some(zpool) = zone_config.zone_type.durable_zpool() { if zone_kind == zone_config.zone_type.kind() { skip_zpools.insert(zpool); @@ -1124,17 +1289,21 @@ impl<'a> BlueprintZonesBuilder<'a> { pub fn current_sled_zones( &self, sled_id: SledUuid, + filter: BlueprintZoneFilter, ) -> Box + '_> { if let Some(sled_zones) = self.changed_zones.get(&sled_id) { - Box::new(sled_zones.iter_zones().map(|z| (z.zone(), z.state()))) - } else if let Some(parent_zones) = self.parent_zones.get(&sled_id) { Box::new( - parent_zones - .zones - .iter() - .map(|z| (z, BuilderZoneState::Unchanged)), + sled_zones.iter_zones(filter).map(|z| (z.zone(), z.state())), ) + } else if let Some(parent_zones) = self.parent_zones.get(&sled_id) { + Box::new(parent_zones.zones.iter().filter_map(move |z| { + if z.disposition.matches(filter) { + Some((z, BuilderZoneState::Unchanged)) + } else { + None + } + })) } else { Box::new(std::iter::empty()) } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs index 3326bfdbe5..93c845add5 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs @@ -6,11 +6,13 @@ use super::Error; use anyhow::bail; use debug_ignore::DebugIgnore; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; +use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::OmicronZoneExternalIp; use nexus_types::deployment::PlanningInput; +use nexus_types::inventory::SourceNatConfig; use omicron_common::address::IpRange; use omicron_common::address::DNS_OPTE_IPV4_SUBNET; use omicron_common::address::DNS_OPTE_IPV6_SUBNET; @@ -20,7 +22,9 @@ use omicron_common::address::NTP_OPTE_IPV4_SUBNET; use omicron_common::address::NTP_OPTE_IPV6_SUBNET; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::MacAddr; +use omicron_common::api::internal::shared::SourceNatConfigError; use oxnet::IpNet; +use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashSet; @@ -28,22 +32,13 @@ use std::hash::Hash; use std::net::IpAddr; use std::net::Ipv4Addr; use std::net::Ipv6Addr; - -// These imports are currently only used `#[cfg(test)]` methods, but those -// methods will become non-test-only once we support boundary NTP zone -// allocation. -#[cfg(test)] -use nexus_types::inventory::SourceNatConfig; -#[cfg(test)] -use omicron_common::api::internal::shared::SourceNatConfigError; -#[cfg(test)] -use std::collections::btree_map::Entry; -#[cfg(test)] use strum::IntoEnumIterator as _; #[derive(Debug)] pub(super) struct BuilderExternalNetworking<'a> { // These fields mirror how RSS chooses addresses for zone NICs. + boundary_ntp_v4_ips: AvailableIterator<'static, Ipv4Addr>, + boundary_ntp_v6_ips: AvailableIterator<'static, Ipv6Addr>, nexus_v4_ips: AvailableIterator<'static, Ipv4Addr>, nexus_v6_ips: AvailableIterator<'static, Ipv6Addr>, @@ -100,6 +95,10 @@ impl<'a> BuilderExternalNetworking<'a> { let mut existing_nexus_v4_ips: HashSet = HashSet::new(); let mut existing_nexus_v6_ips: HashSet = HashSet::new(); + let mut existing_boundary_ntp_v4_ips: HashSet = + HashSet::new(); + let mut existing_boundary_ntp_v6_ips: HashSet = + HashSet::new(); let mut external_ip_alloc = ExternalIpAllocator::new(input.service_ip_pool_ranges()); let mut used_macs: HashSet = HashSet::new(); @@ -108,8 +107,20 @@ impl<'a> BuilderExternalNetworking<'a> { parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) { let zone_type = &z.zone_type; - if let BlueprintZoneType::Nexus(nexus) = zone_type { - match nexus.nic.ip { + match zone_type { + BlueprintZoneType::BoundaryNtp(ntp) => match ntp.nic.ip { + IpAddr::V4(ip) => { + if !existing_boundary_ntp_v4_ips.insert(ip) { + bail!("duplicate Boundary NTP NIC IP: {ip}"); + } + } + IpAddr::V6(ip) => { + if !existing_boundary_ntp_v6_ips.insert(ip) { + bail!("duplicate Boundary NTP NIC IP: {ip}"); + } + } + }, + BlueprintZoneType::Nexus(nexus) => match nexus.nic.ip { IpAddr::V4(ip) => { if !existing_nexus_v4_ips.insert(ip) { bail!("duplicate Nexus NIC IP: {ip}"); @@ -120,7 +131,8 @@ impl<'a> BuilderExternalNetworking<'a> { bail!("duplicate Nexus NIC IP: {ip}"); } } - } + }, + _ => (), } if let Some((external_ip, nic)) = zone_type.external_networking() { @@ -171,7 +183,12 @@ impl<'a> BuilderExternalNetworking<'a> { } } IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => { - // TODO check existing_ntp_v4_ips, once it exists + if !existing_boundary_ntp_v4_ips.contains(&ip) { + bail!( + "planning input contains unexpected NIC \ + (IP not found in parent blueprint): {nic_entry:?}" + ); + } } IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => { // TODO check existing_dns_v4_ips, once it exists @@ -185,7 +202,12 @@ impl<'a> BuilderExternalNetworking<'a> { } } IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => { - // TODO check existing_ntp_v6_ips, once it exists + if !existing_boundary_ntp_v6_ips.contains(&ip) { + bail!( + "planning input contains unexpected NIC \ + (IP not found in parent blueprint): {nic_entry:?}" + ); + } } IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => { // TODO check existing_dns_v6_ips, once it exists @@ -217,10 +239,22 @@ impl<'a> BuilderExternalNetworking<'a> { .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), existing_nexus_v6_ips, ); + let boundary_ntp_v4_ips = AvailableIterator::new( + NTP_OPTE_IPV4_SUBNET + .addr_iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_boundary_ntp_v4_ips, + ); + let boundary_ntp_v6_ips = AvailableIterator::new( + NTP_OPTE_IPV6_SUBNET.iter().skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_boundary_ntp_v6_ips, + ); let available_system_macs = AvailableIterator::new(MacAddr::iter_system(), used_macs); Ok(Self { + boundary_ntp_v4_ips, + boundary_ntp_v6_ips, nexus_v4_ips, nexus_v6_ips, external_ip_alloc, @@ -236,14 +270,14 @@ impl<'a> BuilderExternalNetworking<'a> { IpAddr::V4(_) => ( self.nexus_v4_ips .next() - .ok_or(Error::ExhaustedNexusIps)? + .ok_or(Error::ExhaustedOpteIps { kind: ZoneKind::Nexus })? .into(), IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), ), IpAddr::V6(_) => ( self.nexus_v6_ips .next() - .ok_or(Error::ExhaustedNexusIps)? + .ok_or(Error::ExhaustedOpteIps { kind: ZoneKind::Nexus })? .into(), IpNet::from(*NEXUS_OPTE_IPV6_SUBNET), ), @@ -260,6 +294,43 @@ impl<'a> BuilderExternalNetworking<'a> { nic_mac, }) } + + pub(super) fn for_new_boundary_ntp( + &mut self, + ) -> Result { + let snat_cfg = self.external_ip_alloc.claim_next_snat_ip()?; + let (nic_ip, nic_subnet) = match snat_cfg.ip { + IpAddr::V4(_) => ( + self.boundary_ntp_v4_ips + .next() + .ok_or(Error::ExhaustedOpteIps { + kind: ZoneKind::BoundaryNtp, + })? + .into(), + IpNet::from(*NTP_OPTE_IPV4_SUBNET), + ), + IpAddr::V6(_) => ( + self.boundary_ntp_v6_ips + .next() + .ok_or(Error::ExhaustedOpteIps { + kind: ZoneKind::BoundaryNtp, + })? + .into(), + IpNet::from(*NTP_OPTE_IPV6_SUBNET), + ), + }; + let nic_mac = self + .available_system_macs + .next() + .ok_or(Error::NoSystemMacAddressAvailable)?; + + Ok(ExternalSnatNetworkingChoice { + snat_cfg, + nic_ip, + nic_subnet, + nic_mac, + }) + } } #[derive(Debug, Clone, Copy)] @@ -270,6 +341,14 @@ pub(super) struct ExternalNetworkingChoice { pub(super) nic_mac: MacAddr, } +#[derive(Debug, Clone, Copy)] +pub(super) struct ExternalSnatNetworkingChoice { + pub(super) snat_cfg: SourceNatConfig, + pub(super) nic_ip: IpAddr, + pub(super) nic_subnet: IpNet, + pub(super) nic_mac: MacAddr, +} + /// Combines a base iterator with an `in_use` set, filtering out any elements /// that are in the "in_use" set. /// @@ -407,9 +486,6 @@ impl<'a> ExternalIpAllocator<'a> { Err(Error::NoExternalServiceIpAvailable) } - // This is currently only used by a unit test, but will be used by real code - // once we support boundary NTP zone allocation. - #[cfg(test)] fn claim_next_snat_ip(&mut self) -> Result { // Prefer reusing an existing SNAT IP, if we still have port ranges // available on that ip. @@ -453,9 +529,6 @@ enum SnatPortRange { } impl SnatPortRange { - // This is currently only used by a unit test, but will be used by real code - // once we support boundary NTP zone allocation. - #[cfg(test)] fn into_source_nat_config(self, ip: IpAddr) -> SourceNatConfig { let first = match self { SnatPortRange::One => 0, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs index 6cb76539ec..1413dfec19 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs @@ -5,7 +5,8 @@ use std::collections::BTreeSet; use nexus_types::deployment::{ - BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZonesConfig, + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneFilter, + BlueprintZonesConfig, }; use omicron_common::api::external::Generation; use omicron_uuid_kinds::OmicronZoneUuid; @@ -71,6 +72,31 @@ impl BuilderZonesConfig { Ok(()) } + pub(super) fn expunge_zone( + &mut self, + zone_id: OmicronZoneUuid, + ) -> Result<(), BuilderZonesConfigError> { + let zone = self + .zones + .iter_mut() + .find(|zone| zone.zone.id == zone_id) + .ok_or_else(|| { + let mut unmatched = BTreeSet::new(); + unmatched.insert(zone_id); + BuilderZonesConfigError::ExpungeUnmatchedZones { unmatched } + })?; + + // Check that the zone is expungeable. Typically, zones passed + // in here should have had this check done to them already, but + // in case they're not, or in case something else about those + // zones changed in between, check again. + is_already_expunged(&zone.zone, zone.state)?; + zone.zone.disposition = BlueprintZoneDisposition::Expunged; + zone.state = BuilderZoneState::Modified; + + Ok(()) + } + pub(super) fn expunge_zones( &mut self, mut zones: BTreeSet, @@ -100,8 +126,9 @@ impl BuilderZonesConfig { pub(super) fn iter_zones( &self, + filter: BlueprintZoneFilter, ) -> impl Iterator { - self.zones.iter() + self.zones.iter().filter(move |z| z.zone().disposition.matches(filter)) } pub(super) fn build(self) -> BlueprintZonesConfig { @@ -279,7 +306,10 @@ mod tests { // Iterate over the zones for the sled and ensure that the NTP zone is // present. { - let mut zones = builder.zones.current_sled_zones(new_sled_id); + let mut zones = builder.zones.current_sled_zones( + new_sled_id, + BlueprintZoneFilter::ShouldBeRunning, + ); let (_, state) = zones.next().expect("exactly one zone for sled"); assert!(zones.next().is_none(), "exactly one zone for sled"); assert_eq!( @@ -323,7 +353,7 @@ mod tests { // Attempt to expunge one of the other zones on the sled. let existing_zone_id = change - .iter_zones() + .iter_zones(BlueprintZoneFilter::ShouldBeRunning) .find(|z| z.zone.id != new_zone_id) .expect("at least one existing zone") .zone @@ -352,7 +382,10 @@ mod tests { { // Iterate over the zones and ensure that the Oximeter zone is // present, and marked added. - let mut zones = builder.zones.current_sled_zones(existing_sled_id); + let mut zones = builder.zones.current_sled_zones( + existing_sled_id, + BlueprintZoneFilter::ShouldBeRunning, + ); zones .find_map(|(z, state)| { if z.id == new_zone_id { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 509c6722cb..3bd1b8757e 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -16,6 +16,7 @@ use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::CockroachDbSettings; @@ -144,8 +145,10 @@ impl<'a> Planner<'a> { // Check 2: have all this sled's zones been expunged? It's possible // we ourselves have made this change, which is fine. - let all_zones_expunged = - self.blueprint.current_sled_zones(sled_id).all(|zone| { + let all_zones_expunged = self + .blueprint + .current_sled_zones(sled_id, BlueprintZoneFilter::All) + .all(|zone| { zone.disposition == BlueprintZoneDisposition::Expunged }); @@ -187,7 +190,7 @@ impl<'a> Planner<'a> { if !commissioned_sled_ids.contains(&sled_id) { let num_zones = self .blueprint - .current_sled_zones(sled_id) + .current_sled_zones(sled_id, BlueprintZoneFilter::All) .filter(|zone| { zone.disposition != BlueprintZoneDisposition::Expunged }) @@ -351,8 +354,9 @@ impl<'a> Planner<'a> { let mut zone_placement = None; for zone_kind in [ - DiscretionaryOmicronZone::Nexus, + DiscretionaryOmicronZone::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb, + DiscretionaryOmicronZone::Nexus, ] { let num_zones_to_add = self.num_additional_zones_needed(zone_kind); if num_zones_to_add == 0 { @@ -361,29 +365,30 @@ impl<'a> Planner<'a> { // We need to add at least one zone; construct our `zone_placement` // (or reuse the existing one if a previous loop iteration already // created it). - let zone_placement = match zone_placement.as_mut() { - Some(zone_placement) => zone_placement, - None => { - // This constructs a picture of the sleds as we currently - // understand them, as far as which sleds have discretionary - // zones. This will remain valid as we loop through the - // `zone_kind`s in this function, as any zone additions will - // update the `zone_placement` heap in-place. - let current_discretionary_zones = self - .input - .all_sled_resources(SledFilter::Discretionary) - .filter(|(sled_id, _)| { - !sleds_waiting_for_ntp_zone.contains(&sled_id) - }) - .map(|(sled_id, sled_resources)| { - OmicronZonePlacementSledState { + let zone_placement = zone_placement.get_or_insert_with(|| { + // This constructs a picture of the sleds as we currently + // understand them, as far as which sleds have discretionary + // zones. This will remain valid as we loop through the + // `zone_kind`s in this function, as any zone additions will + // update the `zone_placement` heap in-place. + let current_discretionary_zones = self + .input + .all_sled_resources(SledFilter::Discretionary) + .filter(|(sled_id, _)| { + !sleds_waiting_for_ntp_zone.contains(&sled_id) + }) + .map(|(sled_id, sled_resources)| { + OmicronZonePlacementSledState { sled_id, num_zpools: sled_resources .all_zpools(ZpoolFilter::InService) .count(), discretionary_zones: self .blueprint - .current_sled_zones(sled_id) + .current_sled_zones( + sled_id, + BlueprintZoneFilter::ShouldBeRunning, + ) .filter_map(|zone| { DiscretionaryOmicronZone::from_zone_type( &zone.zone_type, @@ -391,12 +396,9 @@ impl<'a> Planner<'a> { }) .collect(), } - }); - zone_placement.insert(OmicronZonePlacement::new( - current_discretionary_zones, - )) - } - }; + }); + OmicronZonePlacement::new(current_discretionary_zones) + }); self.add_discretionary_zones( zone_placement, zone_kind, @@ -421,17 +423,20 @@ impl<'a> Planner<'a> { for sled_id in self.input.all_sled_ids(SledFilter::InService) { let num_zones_of_kind = self .blueprint - .sled_num_zones_of_kind(sled_id, zone_kind.into()); + .sled_num_running_zones_of_kind(sled_id, zone_kind.into()); num_existing_kind_zones += num_zones_of_kind; } let target_count = match zone_kind { - DiscretionaryOmicronZone::Nexus => { - self.input.target_nexus_zone_count() + DiscretionaryOmicronZone::BoundaryNtp => { + self.input.target_boundary_ntp_zone_count() } DiscretionaryOmicronZone::CockroachDb => { self.input.target_cockroachdb_zone_count() } + DiscretionaryOmicronZone::Nexus => { + self.input.target_nexus_zone_count() + } }; // TODO-correctness What should we do if we have _too many_ @@ -496,29 +501,36 @@ impl<'a> Planner<'a> { // total zones go on a given sled, but we have a count of how many // we want to add. Construct a new target count. Maybe the builder // should provide a different interface here? - let new_total_zone_count = - self.blueprint.sled_num_zones_of_kind(sled_id, kind.into()) - + additional_zone_count; + let new_total_zone_count = self + .blueprint + .sled_num_running_zones_of_kind(sled_id, kind.into()) + + additional_zone_count; let result = match kind { - DiscretionaryOmicronZone::Nexus => { - self.blueprint.sled_ensure_zone_multiple_nexus( + DiscretionaryOmicronZone::BoundaryNtp => self + .blueprint + .sled_promote_internal_ntp_to_boundary_ntp(sled_id)?, + DiscretionaryOmicronZone::CockroachDb => { + self.blueprint.sled_ensure_zone_multiple_cockroachdb( sled_id, new_total_zone_count, )? } - DiscretionaryOmicronZone::CockroachDb => { - self.blueprint.sled_ensure_zone_multiple_cockroachdb( + DiscretionaryOmicronZone::Nexus => { + self.blueprint.sled_ensure_zone_multiple_nexus( sled_id, new_total_zone_count, )? } }; match result { - EnsureMultiple::Changed { added, removed: _ } => { + EnsureMultiple::Changed { added, removed } => { info!( - self.log, "will add {added} Nexus zone(s) to sled"; + self.log, "modified zones on sled"; "sled_id" => %sled_id, + "kind" => ?kind, + "added" => added, + "removed" => removed, ); new_zones_added += added; } @@ -1389,11 +1401,18 @@ mod test { assert_eq!(diff.sleds_removed.len(), 0); assert_eq!(diff.sleds_modified.len(), 1); - // We should be removing all zones using this zpool - assert_eq!(diff.zones.added.len(), 0); + // We should be removing all zones using this zpool. Because we're + // removing the NTP zone, we should add a new one. + assert_eq!(diff.zones.added.len(), 1); assert_eq!(diff.zones.removed.len(), 0); assert_eq!(diff.zones.modified.len(), 1); + let (_zone_id, added_zones) = diff.zones.added.iter().next().unwrap(); + assert_eq!(added_zones.zones.len(), 1); + for zone in &added_zones.zones { + assert_eq!(zone.kind(), ZoneKind::InternalNtp); + } + let (_zone_id, modified_zones) = diff.zones.modified.iter().next().unwrap(); assert_eq!(modified_zones.zones.len(), zones_using_zpool); diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index dcfb3b3150..c08f30124c 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -14,8 +14,9 @@ use std::mem; #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr(test, derive(test_strategy::Arbitrary))] pub(crate) enum DiscretionaryOmicronZone { - Nexus, + BoundaryNtp, CockroachDb, + Nexus, // TODO expand this enum as we start to place more services } @@ -24,11 +25,11 @@ impl DiscretionaryOmicronZone { zone_type: &BlueprintZoneType, ) -> Option { match zone_type { - BlueprintZoneType::Nexus(_) => Some(Self::Nexus), + BlueprintZoneType::BoundaryNtp(_) => Some(Self::BoundaryNtp), BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb), + BlueprintZoneType::Nexus(_) => Some(Self::Nexus), // Zones that we should place but don't yet. - BlueprintZoneType::BoundaryNtp(_) - | BlueprintZoneType::Clickhouse(_) + BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) @@ -46,8 +47,9 @@ impl DiscretionaryOmicronZone { impl From for ZoneKind { fn from(zone: DiscretionaryOmicronZone) -> Self { match zone { - DiscretionaryOmicronZone::Nexus => Self::Nexus, + DiscretionaryOmicronZone::BoundaryNtp => Self::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb, + DiscretionaryOmicronZone::Nexus => Self::Nexus, } } } @@ -68,6 +70,15 @@ pub(super) struct OmicronZonePlacementSledState { pub discretionary_zones: Vec, } +impl OmicronZonePlacementSledState { + fn num_discretionary_zones_of_kind( + &self, + kind: DiscretionaryOmicronZone, + ) -> usize { + self.discretionary_zones.iter().filter(|&&z| z == kind).count() + } +} + /// `OmicronZonePlacement` keeps an internal heap of sleds and their current /// discretionary zones and chooses sleds for placement of additional /// discretionary zones. @@ -154,21 +165,24 @@ impl OmicronZonePlacement { let mut sleds_skipped = Vec::new(); let mut chosen_sled = None; while let Some(sled) = self.sleds.pop() { - // Ensure we have at least one zpool more than the number of - // `zone_kind` zones already placed on this sled. If we don't, we - // already have a zone of this kind on each zpool, so we'll skip - // this sled. - if sled - .discretionary_zones - .iter() - .filter(|&&z| z == zone_kind) - .count() - < sled.num_zpools - { + let num_existing = sled.num_discretionary_zones_of_kind(zone_kind); + + // For boundary NTP, a sled is only eligible if it does not already + // hold a boundary NTP zone. + let should_skip = zone_kind + == DiscretionaryOmicronZone::BoundaryNtp + && num_existing > 0; + + // For all zone kinds, a sled is only eligible if it has at + // least one zpool more than the number of `zone_kind` zones + // already placed on this sled. + let should_skip = should_skip || num_existing >= sled.num_zpools; + + if should_skip { + sleds_skipped.push(sled); + } else { chosen_sled = Some(sled); break; - } else { - sleds_skipped.push(sled); } } @@ -374,14 +388,22 @@ pub mod test { ) -> Result<(), String> { let sled_state = self.sleds.get(&sled_id).expect("valid sled_id"); let existing_zones = sled_state.count_zones_of_kind(kind); - if existing_zones < sled_state.num_zpools { - Ok(()) - } else { + + // Boundary NTP is special: there should be at most one instance per + // sled, so placing a new boundary NTP zone is only legal if the + // sled doesn't already have one. + if kind == DiscretionaryOmicronZone::BoundaryNtp + && existing_zones > 0 + { + Err(format!("sled {sled_id} already has a boundary NTP zone")) + } else if existing_zones >= sled_state.num_zpools { Err(format!( "already have {existing_zones} \ {kind:?} instances but only {} zpools", sled_state.num_zpools )) + } else { + Ok(()) } } @@ -446,10 +468,20 @@ pub mod test { &self, kind: DiscretionaryOmicronZone, ) -> Result<(), String> { - // Zones should be placeable unless every sled already has a zone of - // this kind on every disk. + let max_this_kind_for_sled = |sled_state: &TestSledState| { + // Boundary NTP zones should be placeable unless every sled + // already has one. Other zone types should be placeable unless + // every sled already has a zone of that kind on every disk. + if kind == DiscretionaryOmicronZone::BoundaryNtp { + usize::min(1, sled_state.num_zpools) + } else { + sled_state.num_zpools + } + }; + for (sled_id, sled_state) in self.sleds.iter() { - if sled_state.count_zones_of_kind(kind) < sled_state.num_zpools + if sled_state.count_zones_of_kind(kind) + < max_this_kind_for_sled(sled_state) { return Err(format!( "sled {sled_id} is eligible for {kind:?} placement" diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 534d92bf08..f6989be9ef 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -79,6 +79,7 @@ pub struct SystemDescription { sled_subnets: Box, available_non_scrimlet_slots: BTreeSet, available_scrimlet_slots: BTreeSet, + target_boundary_ntp_zone_count: usize, target_nexus_zone_count: usize, target_cockroachdb_zone_count: usize, target_cockroachdb_cluster_version: CockroachDbClusterVersion, @@ -130,9 +131,11 @@ impl SystemDescription { // Policy defaults let target_nexus_zone_count = NEXUS_REDUNDANCY; - // TODO-cleanup This is wrong, but we don't currently set up any CRDB - // nodes in our fake system, so this prevents downstream test issues - // with the planner thinking our system is out of date from the gate. + // TODO-cleanup These are wrong, but we don't currently set up any + // boundary NTP or CRDB nodes in our fake system, so this prevents + // downstream test issues with the planner thinking our system is out of + // date from the gate. + let target_boundary_ntp_zone_count = 0; let target_cockroachdb_zone_count = 0; let target_cockroachdb_cluster_version = @@ -151,6 +154,7 @@ impl SystemDescription { sled_subnets, available_non_scrimlet_slots, available_scrimlet_slots, + target_boundary_ntp_zone_count, target_nexus_zone_count, target_cockroachdb_zone_count, target_cockroachdb_cluster_version, @@ -319,6 +323,7 @@ impl SystemDescription { ) -> anyhow::Result { let policy = Policy { service_ip_pool_ranges: self.service_ip_pool_ranges.clone(), + target_boundary_ntp_zone_count: self.target_boundary_ntp_zone_count, target_nexus_zone_count: self.target_nexus_zone_count, target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 68971ec3e1..e0ba0f10ba 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,6 +33,7 @@ use nexus_types::identity::Resource; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; +use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; @@ -60,6 +61,7 @@ pub struct PlanningInputFromDb<'a> { pub ip_pool_range_rows: &'a [nexus_db_model::IpPoolRange], pub external_ip_rows: &'a [nexus_db_model::ExternalIp], pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface], + pub target_boundary_ntp_zone_count: usize, pub target_nexus_zone_count: usize, pub target_cockroachdb_zone_count: usize, pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, @@ -75,6 +77,7 @@ impl PlanningInputFromDb<'_> { self.ip_pool_range_rows.iter().map(IpRange::from).collect(); let policy = Policy { service_ip_pool_ranges, + target_boundary_ntp_zone_count: self.target_boundary_ntp_zone_count, target_nexus_zone_count: self.target_nexus_zone_count, target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self @@ -236,6 +239,7 @@ pub async fn reconfigurator_state_load( sled_rows: &sled_rows, zpool_rows: &zpool_rows, ip_pool_range_rows: &ip_pool_range_rows, + target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, target_nexus_zone_count: NEXUS_REDUNDANCY, target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index ca4635b13e..e9095cc991 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -17,6 +17,7 @@ use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; +use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; @@ -175,6 +176,7 @@ impl super::Nexus { ip_pool_range_rows: &ip_pool_range_rows, external_ip_rows: &external_ip_rows, service_nic_rows: &service_nic_rows, + target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, target_nexus_zone_count: NEXUS_REDUNDANCY, target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index a5feff067a..1af3636d0e 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -87,6 +87,10 @@ impl PlanningInput { &self.cockroachdb_settings } + pub fn target_boundary_ntp_zone_count(&self) -> usize { + self.policy.target_boundary_ntp_zone_count + } + pub fn target_nexus_zone_count(&self) -> usize { self.policy.target_nexus_zone_count } @@ -692,6 +696,9 @@ pub struct Policy { /// services (e.g., external DNS, Nexus, boundary NTP) pub service_ip_pool_ranges: Vec, + /// desired total number of deployed Boundary NTP zones + pub target_boundary_ntp_zone_count: usize, + /// desired total number of deployed Nexus zones pub target_nexus_zone_count: usize, @@ -749,6 +756,7 @@ impl PlanningInputBuilder { PlanningInput { policy: Policy { service_ip_pool_ranges: Vec::new(), + target_boundary_ntp_zone_count: 0, target_nexus_zone_count: 0, target_cockroachdb_zone_count: 0, target_cockroachdb_cluster_version: diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 0062e02dd3..471717989a 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -15,9 +15,10 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, - COCKROACHDB_REDUNDANCY, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, - DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, MGS_PORT, NEXUS_REDUNDANCY, - NTP_PORT, NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, + BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DENDRITE_PORT, + DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, + MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, + RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ @@ -48,9 +49,6 @@ use std::num::Wrapping; use thiserror::Error; use uuid::Uuid; -// The number of boundary NTP servers to create from RSS. -const BOUNDARY_NTP_COUNT: usize = 2; - // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Oximeter. const OXIMETER_COUNT: usize = 1; @@ -734,7 +732,7 @@ impl Plan { let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { + let (zone_type, svcname) = if idx < BOUNDARY_NTP_REDUNDANCY { boundary_ntp_servers .push(Host::for_zone(Zone::Other(id)).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?;