From 6a31e658af66dd2a549340b4eeac907a66534752 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 30 Jan 2024 17:30:32 -0500 Subject: [PATCH 01/12] blueprint planner: add nexus on every sled --- Cargo.lock | 1 + clients/sled-agent-client/Cargo.toml | 1 + clients/sled-agent-client/src/lib.rs | 72 ++- common/src/address.rs | 12 + nexus/deployment/src/blueprint_builder.rs | 607 ++++++++++++++++++++-- nexus/deployment/src/planner.rs | 61 ++- nexus/src/app/deployment.rs | 38 +- nexus/types/src/deployment.rs | 9 + nexus/types/src/inventory.rs | 2 +- 9 files changed, 760 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b41abce1f4..4cea003eb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8059,6 +8059,7 @@ dependencies = [ name = "sled-agent-client" version = "0.1.0" dependencies = [ + "anyhow", "async-trait", "chrono", "ipnetwork", diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml index 8630030b24..71b94441ed 100644 --- a/clients/sled-agent-client/Cargo.toml +++ b/clients/sled-agent-client/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +anyhow.workspace = true async-trait.workspace = true chrono.workspace = true omicron-common.workspace = true diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 39de64ec62..eb1e57b11f 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -4,8 +4,11 @@ //! Interface for making API requests to a Sled Agent +use anyhow::Context; use async_trait::async_trait; use std::convert::TryFrom; +use std::net::IpAddr; +use std::net::SocketAddr; use uuid::Uuid; progenitor::generate_api!( @@ -86,6 +89,74 @@ impl types::OmicronZoneType { | types::OmicronZoneType::Oximeter { .. } => false, } } + + /// Identifies whether this is a Nexus zone + pub fn is_nexus(&self) -> bool { + match self { + types::OmicronZoneType::Nexus { .. } => true, + + types::OmicronZoneType::BoundaryNtp { .. } + | types::OmicronZoneType::InternalNtp { .. } + | types::OmicronZoneType::Clickhouse { .. } + | types::OmicronZoneType::ClickhouseKeeper { .. } + | types::OmicronZoneType::CockroachDb { .. } + | types::OmicronZoneType::Crucible { .. } + | types::OmicronZoneType::CruciblePantry { .. } + | types::OmicronZoneType::ExternalDns { .. } + | types::OmicronZoneType::InternalDns { .. } + | types::OmicronZoneType::Oximeter { .. } => false, + } + } + + /// This zone's external IP + pub fn external_ip(&self) -> anyhow::Result> { + match self { + types::OmicronZoneType::Nexus { external_ip, .. } => { + Ok(Some(*external_ip)) + } + + types::OmicronZoneType::ExternalDns { dns_address, .. } => { + let dns_address = + dns_address.parse::().with_context(|| { + format!( + "failed to parse ExternalDns address {dns_address}" + ) + })?; + Ok(Some(dns_address.ip())) + } + + types::OmicronZoneType::BoundaryNtp { snat_cfg, .. } => { + Ok(Some(snat_cfg.ip)) + } + + types::OmicronZoneType::InternalNtp { .. } + | types::OmicronZoneType::Clickhouse { .. } + | types::OmicronZoneType::ClickhouseKeeper { .. } + | types::OmicronZoneType::CockroachDb { .. } + | types::OmicronZoneType::Crucible { .. } + | types::OmicronZoneType::CruciblePantry { .. } + | types::OmicronZoneType::InternalDns { .. } + | types::OmicronZoneType::Oximeter { .. } => Ok(None), + } + } + + /// The service vNIC providing external connectivity to this zone + pub fn service_vnic(&self) -> Option<&types::NetworkInterface> { + match self { + types::OmicronZoneType::Nexus { nic, .. } + | types::OmicronZoneType::ExternalDns { nic, .. } + | types::OmicronZoneType::BoundaryNtp { nic, .. } => Some(nic), + + types::OmicronZoneType::InternalNtp { .. } + | types::OmicronZoneType::Clickhouse { .. } + | types::OmicronZoneType::ClickhouseKeeper { .. } + | types::OmicronZoneType::CockroachDb { .. } + | types::OmicronZoneType::Crucible { .. } + | types::OmicronZoneType::CruciblePantry { .. } + | types::OmicronZoneType::InternalDns { .. } + | types::OmicronZoneType::Oximeter { .. } => None, + } + } } impl omicron_common::api::external::ClientError for types::Error { @@ -351,7 +422,6 @@ impl From for types::Ipv6Net { impl From for types::IpNet { fn from(s: std::net::IpAddr) -> Self { - use std::net::IpAddr; match s { IpAddr::V4(v4) => Self::V4(v4.into()), IpAddr::V6(v6) => Self::V6(v6.into()), diff --git a/common/src/address.rs b/common/src/address.rs index 65a6604daf..c26c6ff911 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -457,6 +457,18 @@ impl TryFrom<(Ipv6Addr, Ipv6Addr)> for IpRange { } } +impl From for IpRange { + fn from(value: Ipv4Range) -> Self { + Self::V4(value) + } +} + +impl From for IpRange { + fn from(value: Ipv6Range) -> Self { + Self::V6(value) + } +} + /// A non-decreasing IPv4 address range, inclusive of both ends. /// /// The first address must be less than or equal to the last address. diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index ac2fe70e6b..2d1e42a544 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -6,11 +6,14 @@ use crate::ip_allocator::IpAllocator; use anyhow::anyhow; +use anyhow::bail; use internal_dns::config::Host; use internal_dns::config::ZoneVariant; use ipnet::IpAdd; use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::NetworkInterface; +use nexus_types::deployment::NetworkInterfaceKind; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::OmicronZoneType; @@ -23,11 +26,20 @@ use omicron_common::address::get_internal_dns_server_addresses; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; +use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; +use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET; use omicron_common::address::NTP_PORT; use omicron_common::address::SLED_RESERVED_ADDRESSES; use omicron_common::api::external::Generation; +use omicron_common::api::external::IpNet; +use omicron_common::api::external::MacAddr; +use omicron_common::api::external::Vni; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::collections::HashSet; +use std::net::IpAddr; +use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use thiserror::Error; @@ -38,6 +50,12 @@ use uuid::Uuid; pub enum Error { #[error("sled {sled_id}: ran out of available addresses for sled")] OutOfAddresses { sled_id: Uuid }, + #[error("no Nexus zones exist in parent blueprint")] + NoNexusZonesInParentBlueprint, + #[error("no external service IP addresses are available")] + NoExternalServiceIpAvailable, + #[error("exhausted available Nexus IP addresses")] + ExhaustedNexusIps, #[error("programming error in planner")] Planner(#[from] anyhow::Error), } @@ -81,6 +99,16 @@ pub struct BlueprintBuilder<'a> { zones_in_service: BTreeSet, creator: String, comments: Vec, + + // These fields emulate how RSS choses addresses for zone NICs. + nexus_v4_ips: Box + Send>, + nexus_v6_ips: Box + Send>, + + // Iterator of available external IPs for service zones + available_external_ips: Box + Send + 'a>, + + // All MACs used by NICs in zones of the blueprint we're building. + used_macs: HashSet, } impl<'a> BlueprintBuilder<'a> { @@ -146,8 +174,102 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint: &'a Blueprint, policy: &'a Policy, creator: &str, - ) -> BlueprintBuilder<'a> { - BlueprintBuilder { + ) -> anyhow::Result> { + // Scan through the parent blueprint and build several sets of "used + // resources". When adding new control plane zones to a sled, we may + // need to allocate new resources to that zone. However, allocation at + // this point is entirely optimistic and theoretical: our caller may + // discard the blueprint we create without ever making it the new + // target, or it might be an arbitrarily long time before it becomes the + // target. We need to be able to make allocation decisions that we + // expect the blueprint executor to be able to realize successfully if + // and when we become the target, but we cannot _actually_ perform + // resource allocation. + // + // To do this, we look at our parent blueprint's used resources, and + // then choose new resources that aren't already in use (if possible; if + // we need to allocate a new resource and the parent blueprint appears + // to be using all the resources of that kind, our blueprint generation + // will fail). + // + // For example, RSS assigns Nexus NIC IPs by stepping through a list of + // addresses based on `NEXUS_OPTE_IPVx_SUBNET` (as in the iterators + // below). We use the same list of addresses, but additionally need to + // filter out the existing IPs for any Nexus instances that already + // exist. + // + // Note that by building these iterators up front based on + // `parent_blueprint`, we cannot reuse resources in a case where we + // remove a zone that used a resource and then add another zone that + // wants the same kind of resource. We don't support zone removal yet, + // but expect this to be okay: we don't anticipate removal and addition + // to frequently be combined into the exact same blueprint, particularly + // in a way that expects the addition to reuse resources from the + // removal; we will won't want to attempt to reuse resources from a zone + // until we know it's been fully removed. + let mut existing_nexus_v4_ips: HashSet = HashSet::new(); + let mut existing_nexus_v6_ips: HashSet = HashSet::new(); + let mut used_external_ips: HashSet = HashSet::new(); + let mut used_macs: HashSet = HashSet::new(); + + for zone_config in parent_blueprint.omicron_zones.values() { + for z in &zone_config.zones { + if let OmicronZoneType::Nexus { nic, .. } = &z.zone_type { + match nic.ip { + IpAddr::V4(ip) => { + if !existing_nexus_v4_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); + } + } + IpAddr::V6(ip) => { + if !existing_nexus_v6_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); + } + } + } + } + if let Some(external_ip) = z.zone_type.external_ip()? { + if !used_external_ips.insert(external_ip) { + bail!("duplicate external IP: {external_ip}"); + } + } + if let Some(nic) = z.zone_type.service_vnic() { + if !used_macs.insert(nic.mac) { + bail!("duplicate service vNIC MAC: {}", nic.mac); + } + } + } + } + + // TODO-performance Building these iterators as "walk through the list + // and skip anything we've used already" is fine as long as we're + // talking about a small number of resources (e.g., single-digit number + // of Nexus instances), but wouldn't be ideal if we have many resources + // we need to skip. We could do something smarter here based on the sets + // of used resources we built above if needed. + let nexus_v4_ips = Box::new( + NEXUS_OPTE_IPV4_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .filter(move |ip| !existing_nexus_v4_ips.contains(ip)), + ); + let nexus_v6_ips = Box::new( + NEXUS_OPTE_IPV6_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .filter(move |ip| !existing_nexus_v6_ips.contains(ip)), + ); + let available_external_ips = Box::new( + policy + .service_ip_pool_ranges + .iter() + .flat_map(|r| r.iter()) + .filter(move |ip| !used_external_ips.contains(ip)), + ); + + Ok(BlueprintBuilder { parent_blueprint, policy, sled_ip_allocators: BTreeMap::new(), @@ -155,7 +277,11 @@ impl<'a> BlueprintBuilder<'a> { zones_in_service: parent_blueprint.zones_in_service.clone(), creator: creator.to_owned(), comments: Vec::new(), - } + nexus_v4_ips, + nexus_v6_ips, + available_external_ips, + used_macs, + }) } /// Assemble a final [`Blueprint`] based on the contents of the builder @@ -329,6 +455,118 @@ impl<'a> BlueprintBuilder<'a> { Ok(Ensure::Added) } + pub fn sled_ensure_zone_nexus( + &mut self, + sled_id: Uuid, + ) -> Result { + // If there's already a Nexus zone on this sled, do nothing. + let has_nexus = self + .parent_blueprint + .omicron_zones + .get(&sled_id) + .map(|found_zones| { + found_zones.zones.iter().any(|z| z.zone_type.is_nexus()) + }) + .unwrap_or(false); + if has_nexus { + return Ok(Ensure::NotNeeded); + } + + // Whether Nexus should use TLS and what the external DNS servers it + // should use are currently provided at rack-setup time, and should be + // consistent across all Nexus instances. We'll assume we can copy them + // from any other Nexus zone in our parent blueprint. + // + // TODO-correctness Once these properties can be changed by a rack + // operator, this will need more work. At a minimum, if such a change + // goes through the blueprint system (which seems likely), we'll need to + // check that we're if this builder is being used to make such a change, + // that change is also reflected here in a new zone. Perhaps these + // settings should be part of `Policy` instead? + let (external_tls, external_dns_servers) = self + .parent_blueprint + .omicron_zones + .values() + .find_map(|sled_zones| { + sled_zones.zones.iter().find_map(|z| match &z.zone_type { + OmicronZoneType::Nexus { + external_tls, + external_dns_servers, + .. + } => Some((*external_tls, external_dns_servers.clone())), + _ => None, + }) + }) + .ok_or(Error::NoNexusZonesInParentBlueprint)?; + + let nexus_id = Uuid::new_v4(); + let external_ip = self + .available_external_ips + .next() + .ok_or(Error::NoExternalServiceIpAvailable)?; + + let nic = { + let (ip, subnet) = match external_ip { + IpAddr::V4(_) => ( + self.nexus_v4_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(), + ), + IpAddr::V6(_) => ( + self.nexus_v6_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV6_SUBNET).into(), + ), + }; + NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(nexus_id), + name: format!("nexus-{nexus_id}").parse().unwrap(), + ip, + mac: self.random_mac(), + subnet, + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + } + }; + + let ip = self.sled_alloc_ip(sled_id)?; + let port = omicron_common::address::NEXUS_INTERNAL_PORT; + let internal_address = SocketAddrV6::new(ip, port, 0, 0).to_string(); + let zone = OmicronZoneConfig { + id: nexus_id, + underlay_address: ip, + zone_type: OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + }, + }; + self.sled_add_zone(sled_id, zone)?; + Ok(Ensure::Added) + } + + fn random_mac(&mut self) -> MacAddr { + let mut mac = MacAddr::random_system(); + + // TODO-performance if the size of `used_macs` starts to approach some + // nontrivial fraction of the total random space, we could do something + // smarter here (e.g., start with a random mac then increment until we + // find an unused value). + while !self.used_macs.insert(mac) { + mac = MacAddr::random_system(); + } + + mac + } + fn sled_add_zone( &mut self, sled_id: Uuid, @@ -422,26 +660,17 @@ impl<'a> BlueprintBuilder<'a> { #[cfg(test)] pub mod test { - use super::BlueprintBuilder; - use ipnet::IpAdd; - use nexus_types::deployment::Policy; - use nexus_types::deployment::SledResources; - use nexus_types::deployment::ZpoolName; - use nexus_types::inventory::Collection; + use super::*; + use omicron_common::address::IpRange; + use omicron_common::address::Ipv4Range; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; - use omicron_common::api::external::Generation; use sled_agent_client::types::{ Baseboard, Inventory, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, OmicronZonesConfig, SledRole, }; - use std::collections::BTreeMap; - use std::collections::BTreeSet; - use std::net::Ipv6Addr; - use std::net::SocketAddrV6; use std::str::FromStr; - use uuid::Uuid; /// Returns a collection and policy describing a pretty simple system pub fn example() -> (Collection, Policy) { @@ -452,7 +681,31 @@ pub mod test { "a5f3db3a-61aa-4f90-ad3e-02833c253bf5", "0d168386-2551-44e8-98dd-ae7a7570f8a0", ]; - let mut policy = Policy { sleds: BTreeMap::new() }; + let mut policy = Policy { + sleds: BTreeMap::new(), + // IPs from TEST-NET-1 (RFC 5737) + service_ip_pool_ranges: vec![Ipv4Range::new( + "192.0.2.2".parse().unwrap(), + "192.0.2.20".parse().unwrap(), + ) + .unwrap() + .into()], + }; + let mut service_ip_pool_range = policy.service_ip_pool_ranges[0].iter(); + let mut nexus_nic_ips = NEXUS_OPTE_IPV4_SUBNET + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES); + let mut nexus_nic_macs = { + let mut used = HashSet::new(); + std::iter::from_fn(move || { + let mut mac = MacAddr::random_system(); + while !used.insert(mac) { + mac = MacAddr::random_system(); + } + Some(mac) + }) + }; + for sled_id_str in sled_ids.iter() { let sled_id: Uuid = sled_id_str.parse().unwrap(); let sled_ip = policy_add_sled(&mut policy, sled_id); @@ -480,17 +733,55 @@ pub mod test { .unwrap(); let zpools = &policy.sleds.get(&sled_id).unwrap().zpools; - let ip1 = sled_ip.saturating_add(1); - let zones: Vec<_> = std::iter::once(OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: sled_ip.saturating_add(1), - zone_type: OmicronZoneType::InternalNtp { - address: SocketAddrV6::new(ip1, 12345, 0, 0).to_string(), - dns_servers: vec![], - domain: None, - ntp_servers: vec![], - }, + let mut sled_ips = + std::iter::successors(Some(sled_ip.saturating_add(1)), |ip| { + Some(ip.saturating_add(1)) + }); + let zones: Vec<_> = std::iter::once({ + let ip = sled_ips.next().unwrap(); + OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: ip, + zone_type: OmicronZoneType::InternalNtp { + address: SocketAddrV6::new(ip, 12345, 0, 0).to_string(), + dns_servers: vec![], + domain: None, + ntp_servers: vec![], + }, + } }) + .chain(std::iter::once({ + let id = Uuid::new_v4(); + let ip = sled_ips.next().unwrap(); + let external_ip = + service_ip_pool_range.next().expect("no service IPs left"); + let nic_ip = + nexus_nic_ips.next().expect("no nexus nic IPs left"); + OmicronZoneConfig { + id, + underlay_address: ip, + zone_type: OmicronZoneType::Nexus { + internal_address: SocketAddrV6::new(ip, 12346, 0, 0) + .to_string(), + external_ip, + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(id), + name: format!("nexus-{id}").parse().unwrap(), + ip: nic_ip.into(), + mac: nexus_nic_macs + .next() + .expect("no nexus nic MACs left"), + subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + external_tls: false, + external_dns_servers: Vec::new(), + }, + } + })) .chain(zpools.iter().enumerate().map(|(i, zpool_name)| { let ip = sled_ip.saturating_add(u128::try_from(i + 2).unwrap()); OmicronZoneConfig { @@ -575,7 +866,8 @@ pub mod test { &blueprint_initial, &policy, "test_basic", - ); + ) + .expect("failed to create builder"); let blueprint = builder.build(); let diff = blueprint_initial.diff(&blueprint); println!( @@ -598,7 +890,8 @@ pub mod test { .expect("failed to create initial blueprint"); let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test_basic"); + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test_basic") + .expect("failed to create builder"); // The initial blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure @@ -626,7 +919,8 @@ pub mod test { let new_sled_id = Uuid::new_v4(); let _ = policy_add_sled(&mut policy, new_sled_id); let mut builder = - BlueprintBuilder::new_based_on(&blueprint2, &policy, "test_basic"); + BlueprintBuilder::new_based_on(&blueprint2, &policy, "test_basic") + .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { @@ -691,4 +985,259 @@ pub mod test { .collect::>(); assert_eq!(crucible_pool_names, new_sled_resources.zpools); } + + #[test] + fn test_add_nexus_with_no_existing_nexus_zones() { + let (mut collection, policy) = example(); + + // Adding a new Nexus zone currently requires copying settings from an + // existing Nexus zone. If we remove all Nexus zones from the + // collection, create a blueprint, then try to add a Nexus zone, it + // should fail. + for zones in collection.omicron_zones.values_mut() { + zones.zones.zones.retain(|z| { + !matches!(z.zone_type, OmicronZoneType::Nexus { .. }) + }); + } + + let parent = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .expect("failed to create initial blueprint"); + + let mut builder = + BlueprintBuilder::new_based_on(&parent, &policy, "test") + .expect("failed to create builder"); + + let err = builder + .sled_ensure_zone_nexus( + collection + .omicron_zones + .keys() + .next() + .copied() + .expect("no sleds present"), + ) + .unwrap_err(); + + assert!( + matches!(err, Error::NoNexusZonesInParentBlueprint), + "unexpected error {err}" + ); + } + + #[test] + fn test_add_nexus_error_cases() { + let (mut collection, policy) = example(); + + // Remove the Nexus zone from one of the sleds so that + // `sled_ensure_zone_nexus` can attempt to add a Nexus zone to + // `sled_id`. + let sled_id = { + let mut selected_sled_id = None; + for (sled_id, zones) in &mut collection.omicron_zones { + let nzones_before_retain = zones.zones.zones.len(); + zones.zones.zones.retain(|z| { + !matches!(z.zone_type, OmicronZoneType::Nexus { .. }) + }); + if zones.zones.zones.len() < nzones_before_retain { + selected_sled_id = Some(*sled_id); + break; + } + } + selected_sled_id.expect("found no sleds with Nexus zone") + }; + + let parent = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .expect("failed to create initial blueprint"); + + { + // Attempting to add Nexus to the zone we removed it from (with no + // other changes to the environment) should succeed. + let mut builder = + BlueprintBuilder::new_based_on(&parent, &policy, "test") + .expect("failed to create builder"); + let added = builder + .sled_ensure_zone_nexus(sled_id) + .expect("failed to ensure nexus zone"); + + assert_eq!(added, Ensure::Added); + } + + { + // Replace the policy's external service IP pool ranges with ranges + // that are already in use by existing zones. Attempting to add a + // Nexus with no remaining external IPs should fail. + let mut policy = policy.clone(); + let mut used_ip_ranges = Vec::new(); + for zones in parent.omicron_zones.values() { + for z in &zones.zones { + if let Some(ip) = z + .zone_type + .external_ip() + .expect("failed to check for external IP") + { + used_ip_ranges.push(IpRange::from(ip)); + } + } + } + assert!(!used_ip_ranges.is_empty()); + policy.service_ip_pool_ranges = used_ip_ranges; + + let mut builder = + BlueprintBuilder::new_based_on(&parent, &policy, "test") + .expect("failed to create builder"); + let err = builder.sled_ensure_zone_nexus(sled_id).unwrap_err(); + + assert!( + matches!(err, Error::NoExternalServiceIpAvailable), + "unexpected error {err}" + ); + } + + // We're not testing the `ExhaustedNexusIps` error case (where we've run + // out of Nexus OPTE addresses), because it's fairly diffiult to induce + // that from outside: we would need to start from a parent blueprint + // that contained a Nexus instance for every IP in the + // `NEXUS_OPTE_*_SUBNET`. We could hack around that by creating the + // `BlueprintBuilder` and mucking with its internals, but that doesn't + // seem like a particularly useful test either. + } + + #[test] + fn test_invalid_parent_blueprint_two_zones_with_same_external_ip() { + let (mut collection, policy) = example(); + + // We should fail if the parent blueprint claims to contain two + // zones with the same external IP. Skim through the zones, copy the + // external IP from one Nexus zone, then assign it to a later Nexus + // zone. + let mut found_second_nexus_zone = false; + let mut nexus_external_ip = None; + + 'outer: for zones in collection.omicron_zones.values_mut() { + for z in zones.zones.zones.iter_mut() { + if let OmicronZoneType::Nexus { external_ip, .. } = + &mut z.zone_type + { + if let Some(ip) = nexus_external_ip { + *external_ip = ip; + found_second_nexus_zone = true; + break 'outer; + } else { + nexus_external_ip = Some(*external_ip); + continue 'outer; + } + } + } + } + assert!(found_second_nexus_zone, "only one Nexus zone present?"); + + let parent = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + Ok(_) => panic!("unexpected success"), + Err(err) => assert!( + err.to_string().contains("duplicate external IP"), + "unexpected error: {err:#}" + ), + }; + } + + #[test] + fn test_invalid_parent_blueprint_two_nexus_zones_with_same_nic_ip() { + let (mut collection, policy) = example(); + + // We should fail if the parent blueprint claims to contain two + // Nexus zones with the same NIC IP. Skim through the zones, copy + // the NIC IP from one Nexus zone, then assign it to a later + // Nexus zone. + let mut found_second_nexus_zone = false; + let mut nexus_nic_ip = None; + + 'outer: for zones in collection.omicron_zones.values_mut() { + for z in zones.zones.zones.iter_mut() { + if let OmicronZoneType::Nexus { nic, .. } = &mut z.zone_type { + if let Some(ip) = nexus_nic_ip { + nic.ip = ip; + found_second_nexus_zone = true; + break 'outer; + } else { + nexus_nic_ip = Some(nic.ip); + continue 'outer; + } + } + } + } + assert!(found_second_nexus_zone, "only one Nexus zone present?"); + + let parent = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + Ok(_) => panic!("unexpected success"), + Err(err) => assert!( + err.to_string().contains("duplicate Nexus NIC IP"), + "unexpected error: {err:#}" + ), + }; + } + + #[test] + fn test_invalid_parent_blueprint_two_zones_with_same_vnic_mac() { + let (mut collection, policy) = example(); + + // We should fail if the parent blueprint claims to contain two + // zones with the same service vNIC MAC address. Skim through the + // zones, copy the NIC MAC from one Nexus zone, then assign it to a + // later Nexus zone. + let mut found_second_nexus_zone = false; + let mut nexus_nic_mac = None; + + 'outer: for zones in collection.omicron_zones.values_mut() { + for z in zones.zones.zones.iter_mut() { + if let OmicronZoneType::Nexus { nic, .. } = &mut z.zone_type { + if let Some(mac) = nexus_nic_mac { + nic.mac = mac; + found_second_nexus_zone = true; + break 'outer; + } else { + nexus_nic_mac = Some(nic.mac); + continue 'outer; + } + } + } + } + assert!(found_second_nexus_zone, "only one Nexus zone present?"); + + let parent = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + Ok(_) => panic!("unexpected success"), + Err(err) => assert!( + err.to_string().contains("duplicate service vNIC MAC"), + "unexpected error: {err:#}" + ), + }; + } } diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 0a8e1f0b81..5b6bd4d4d3 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -39,10 +39,10 @@ impl<'a> Planner<'a> { // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. inventory: &'a Collection, - ) -> Planner<'a> { + ) -> anyhow::Result> { let blueprint = - BlueprintBuilder::new_based_on(parent_blueprint, policy, creator); - Planner { log, policy, blueprint, inventory } + BlueprintBuilder::new_based_on(parent_blueprint, policy, creator)?; + Ok(Planner { log, policy, blueprint, inventory }) } pub fn plan(mut self) -> Result { @@ -133,7 +133,30 @@ impl<'a> Planner<'a> { } } - if ncrucibles_added > 0 { + // TODO XXX To test Nexus zone addition, attempt to ensure every + // sled has a Nexus zone. This is NOT a sensible policy, and should + // be removed before this lands on `main`. + // + // Adding a new Nexus currently requires the parent blueprint to + // have had at least one Nexus. By trying to ensure a Nexus on every + // sled, that implicitly requires every parent_blueprint we accept + // to have at least one Nexus in order to successfully call this + // method. That should be true in production and is currently true + // for all tests, but might be overly restrictive? We could change + // this to "if at least one Nexus exists, add a Nexus on every + // sled", but that seems like overkill for what should be a very + // short-lived policy. + // + // Ensuring a Nexus zone exists will also fail if there are no + // external IP addresses left. We should take that into account with + // whatever real logic we put here, too. + let nexus_added = + match self.blueprint.sled_ensure_zone_nexus(*sled_id)? { + Ensure::Added => true, + Ensure::NotNeeded => false, + }; + + if ncrucibles_added > 0 || nexus_added { // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". @@ -188,6 +211,7 @@ mod test { "no-op?", &collection, ) + .expect("failed to create planner") .plan() .expect("failed to plan"); @@ -210,6 +234,7 @@ mod test { "test: add NTP?", &collection, ) + .expect("failed to create planner") .plan() .expect("failed to plan"); @@ -274,11 +299,12 @@ mod test { "test: add Crucible zones?", &collection, ) + .expect("failed to create planner") .plan() .expect("failed to plan"); let diff = blueprint3.diff(&blueprint5); - println!("3 -> 5 (expect Crucible zones):\n{}", diff); + println!("3 -> 5 (expect 3 Crucible and 1 Nexus zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); let sleds = diff.sleds_changed().collect::>(); @@ -292,14 +318,28 @@ mod test { assert_eq!(sled_changes.zones_removed().count(), 0); assert_eq!(sled_changes.zones_changed().count(), 0); let zones = sled_changes.zones_added().collect::>(); - assert_eq!(zones.len(), 3); + let mut num_crucible_added = 0; + let mut num_nexus_added = 0; for zone in &zones { - let OmicronZoneType::Crucible { .. } = zone.zone_type else { - panic!("unexpectedly added a non-Crucible zone"); - }; + match zone.zone_type { + OmicronZoneType::Crucible { .. } => { + num_crucible_added += 1; + } + OmicronZoneType::Nexus { .. } => { + num_nexus_added += 1; + } + _ => panic!("unexpectedly added a non-Crucible zone: {zone:?}"), + } } + assert_eq!(num_crucible_added, 3); + assert_eq!(num_nexus_added, 1); - // Check that there are no more steps + // Check that there are no more steps. + // + // This also implicitly checks that the new Nexus zone added in + // blueprint4 did not reuse any resources from the existing Nexus zones + // (otherwise creation of the planner would fail due to an invalid + // parent blueprint). let blueprint6 = Planner::new_based_on( logctx.log.clone(), &blueprint5, @@ -307,6 +347,7 @@ mod test { "test: no-op?", &collection, ) + .expect("failed to create planner") .plan() .expect("failed to plan"); diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 65f8f4d028..c8eeeb0192 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -18,6 +18,7 @@ use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; use nexus_types::identity::Asset; use nexus_types::inventory::Collection; +use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::CreateResult; @@ -174,6 +175,30 @@ impl super::Nexus { }) .collect(); + let service_ip_pool_ranges = { + let (authz_service_ip_pool, _) = + datastore.ip_pools_service_lookup(opctx).await?; + + let mut ip_ranges = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = datastore + .ip_pool_list_ranges( + opctx, + &authz_service_ip_pool, + &p.current_pagparams(), + ) + .await?; + // The use of `last_address` here assumes `paginator` is sorting + // in Ascending order (which it does - see the implementation of + // `current_pagparams()`). + paginator = p.found_batch(&batch, &|r| r.last_address); + ip_ranges.extend(batch.iter().map(IpRange::from)); + } + + ip_ranges + }; + // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to // transient (or even persistent) errors. It's not yet clear what @@ -192,7 +217,11 @@ impl super::Nexus { "fetching latest inventory collection for blueprint planner", )?; - Ok(PlanningContext { creator, policy: Policy { sleds }, inventory }) + Ok(PlanningContext { + creator, + policy: Policy { sleds, service_ip_pool_ranges }, + inventory, + }) } async fn blueprint_add( @@ -252,7 +281,12 @@ impl super::Nexus { &planning_context.policy, &planning_context.creator, &inventory, - ); + ) + .map_err(|error| { + Error::internal_error(&format!( + "error creating blueprint planner: {error:#}", + )) + })?; let blueprint = planner.plan().map_err(|error| { Error::internal_error(&format!( "error generating blueprint: {}", diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 3b4c3b3142..936a9ae1a6 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -12,12 +12,15 @@ //! convenient to separate these concerns.) use crate::inventory::Collection; +pub use crate::inventory::NetworkInterface; +pub use crate::inventory::NetworkInterfaceKind; pub use crate::inventory::OmicronZoneConfig; pub use crate::inventory::OmicronZoneDataset; pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; +use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; @@ -43,13 +46,19 @@ use uuid::Uuid; /// /// The current policy is pretty limited. It's aimed primarily at supporting /// the add/remove sled use case. +#[derive(Debug, Clone)] pub struct Policy { /// set of sleds that are supposed to be part of the control plane, along /// with information about resources available to the planner pub sleds: BTreeMap, + + /// ranges specified by the IP pool for externally-visible control plane + /// services (e.g., external DNS, Nexus, boundary NTP) + pub service_ip_pool_ranges: Vec, } /// Describes the resources available on each sled for the planner +#[derive(Debug, Clone)] pub struct SledResources { /// zpools on this sled /// diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index c99e51af4f..50e8b380b3 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -48,7 +48,7 @@ use uuid::Uuid; /// database. /// /// See the documentation in the database schema for more background. -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct Collection { /// unique identifier for this collection pub id: Uuid, From 617b5370b361f63951cdc2af7301202c009514e7 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 1 Feb 2024 16:15:55 -0500 Subject: [PATCH 02/12] fix broken tests --- nexus/db-queries/src/db/datastore/deployment.rs | 10 ++++++++-- nexus/deployment/src/planner.rs | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 72adb1d3df..e354d49c70 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1061,7 +1061,8 @@ mod tests { use std::mem; use std::net::Ipv6Addr; - static EMPTY_POLICY: Policy = Policy { sleds: BTreeMap::new() }; + static EMPTY_POLICY: Policy = + Policy { sleds: BTreeMap::new(), service_ip_pool_ranges: Vec::new() }; // This is a not-super-future-maintainer-friendly helper to check that all // the subtables related to blueprints have been pruned of a specific @@ -1131,6 +1132,7 @@ mod tests { ) }) .collect(), + service_ip_pool_ranges: Vec::new(), } } @@ -1320,7 +1322,8 @@ mod tests { // Create a builder for a child blueprint. let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test"); + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test") + .expect("failed to create builder"); // Add zones to our new sled. assert_eq!( @@ -1465,9 +1468,11 @@ mod tests { .unwrap(); let blueprint2 = BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") + .expect("failed to create builder") .build(); let blueprint3 = BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") + .expect("failed to create builder") .build(); assert_eq!(blueprint1.parent_blueprint_id, None); assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); @@ -1559,6 +1564,7 @@ mod tests { // with enabled=false, that status is serialized. let blueprint4 = BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") + .expect("failed to create builder") .build(); assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 5b6bd4d4d3..a6e775494a 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -265,6 +265,7 @@ mod test { "test: add nothing more", &collection, ) + .expect("failed to create planner") .plan() .expect("failed to plan"); let diff = blueprint3.diff(&blueprint4); From 029dd2bd4ec9fa148f483389356a689db1e8fef6 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 09:43:36 -0500 Subject: [PATCH 03/12] allow blueprints to place multiple Nexuses on one sled --- Cargo.lock | 1 + nexus/deployment/src/blueprint_builder.rs | 199 +++++++--- nexus/deployment/src/default_service_count.rs | 8 + nexus/deployment/src/lib.rs | 5 +- nexus/deployment/src/planner.rs | 370 +++++++++++++++--- sled-agent/Cargo.toml | 1 + sled-agent/src/rack_setup/plan/service.rs | 6 +- 7 files changed, 472 insertions(+), 118 deletions(-) create mode 100644 nexus/deployment/src/default_service_count.rs diff --git a/Cargo.lock b/Cargo.lock index 99e9d5d109..bbe857e231 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5194,6 +5194,7 @@ dependencies = [ "macaddr", "mg-admin-client", "nexus-client", + "nexus-deployment", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 2d1e42a544..8625b90edc 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -70,6 +70,16 @@ pub enum Ensure { NotNeeded, } +/// Describes whether an idempotent "ensure" operation resulted in multiple +/// actions taken or no action was necessary +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum EnsureMultiple { + /// action was taken, and multiple items were added + Added(usize), + /// no action was necessary + NotNeeded, +} + /// Helper for assembling a blueprint /// /// There are two basic ways to assemble a new blueprint: @@ -455,22 +465,60 @@ impl<'a> BlueprintBuilder<'a> { Ok(Ensure::Added) } - pub fn sled_ensure_zone_nexus( + /// Return the number of Nexus zones that would be configured to run on the + /// given sled if this builder generated a blueprint + /// + /// This value may change before a blueprint is actually generated if + /// further changes are made to the builder. + pub fn sled_num_nexus_zones(&self, sled_id: Uuid) -> Result { + // Find the current config for this sled. + // + // Start with self.omicron_zones, which contains entries for any + // sled whose zones config is changing in this blueprint. + let Some(sled_config) = + self.omicron_zones.get(&sled_id).or_else(|| { + // If it's not there, use the config from the parent + // blueprint. + self.parent_blueprint.omicron_zones.get(&sled_id) + }) + else { + return Ok(0); + }; + + Ok(sled_config.zones.iter().filter(|z| z.zone_type.is_nexus()).count()) + } + + pub fn sled_ensure_zone_multiple_nexus( &mut self, sled_id: Uuid, - ) -> Result { - // If there's already a Nexus zone on this sled, do nothing. - let has_nexus = self + desired_zone_count: usize, + ) -> Result { + // How many Nexus zones are already running on this sled? + let nexus_count = self .parent_blueprint .omicron_zones .get(&sled_id) .map(|found_zones| { - found_zones.zones.iter().any(|z| z.zone_type.is_nexus()) + found_zones + .zones + .iter() + .filter(|z| z.zone_type.is_nexus()) + .count() }) - .unwrap_or(false); - if has_nexus { - return Ok(Ensure::NotNeeded); - } + .unwrap_or(0); + + let num_nexus_to_add = match desired_zone_count.checked_sub(nexus_count) + { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a Nexus zone not yet supported \ + (sled {sled_id} has {nexus_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; // Whether Nexus should use TLS and what the external DNS servers it // should use are currently provided at rack-setup time, and should be @@ -499,58 +547,62 @@ impl<'a> BlueprintBuilder<'a> { }) .ok_or(Error::NoNexusZonesInParentBlueprint)?; - let nexus_id = Uuid::new_v4(); - let external_ip = self - .available_external_ips - .next() - .ok_or(Error::NoExternalServiceIpAvailable)?; - - let nic = { - let (ip, subnet) = match external_ip { - IpAddr::V4(_) => ( - self.nexus_v4_ips - .next() - .ok_or(Error::ExhaustedNexusIps)? - .into(), - IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(), - ), - IpAddr::V6(_) => ( - self.nexus_v6_ips - .next() - .ok_or(Error::ExhaustedNexusIps)? - .into(), - IpNet::from(*NEXUS_OPTE_IPV6_SUBNET).into(), - ), + for _ in 0..num_nexus_to_add { + let nexus_id = Uuid::new_v4(); + let external_ip = self + .available_external_ips + .next() + .ok_or(Error::NoExternalServiceIpAvailable)?; + + let nic = { + let (ip, subnet) = match external_ip { + IpAddr::V4(_) => ( + self.nexus_v4_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(), + ), + IpAddr::V6(_) => ( + self.nexus_v6_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV6_SUBNET).into(), + ), + }; + NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(nexus_id), + name: format!("nexus-{nexus_id}").parse().unwrap(), + ip, + mac: self.random_mac(), + subnet, + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + } }; - NetworkInterface { - id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service(nexus_id), - name: format!("nexus-{nexus_id}").parse().unwrap(), - ip, - mac: self.random_mac(), - subnet, - vni: Vni::SERVICES_VNI, - primary: true, - slot: 0, - } - }; - let ip = self.sled_alloc_ip(sled_id)?; - let port = omicron_common::address::NEXUS_INTERNAL_PORT; - let internal_address = SocketAddrV6::new(ip, port, 0, 0).to_string(); - let zone = OmicronZoneConfig { - id: nexus_id, - underlay_address: ip, - zone_type: OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - }, - }; - self.sled_add_zone(sled_id, zone)?; - Ok(Ensure::Added) + let ip = self.sled_alloc_ip(sled_id)?; + let port = omicron_common::address::NEXUS_INTERNAL_PORT; + let internal_address = + SocketAddrV6::new(ip, port, 0, 0).to_string(); + let zone = OmicronZoneConfig { + id: nexus_id, + underlay_address: ip, + zone_type: OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers: external_dns_servers.clone(), + }, + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Added(num_nexus_to_add)) } fn random_mac(&mut self) -> MacAddr { @@ -645,7 +697,7 @@ impl<'a> BlueprintBuilder<'a> { allocator }); - allocator.alloc().ok_or_else(|| Error::OutOfAddresses { sled_id }) + allocator.alloc().ok_or(Error::OutOfAddresses { sled_id }) } fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { @@ -1012,13 +1064,14 @@ pub mod test { .expect("failed to create builder"); let err = builder - .sled_ensure_zone_nexus( + .sled_ensure_zone_multiple_nexus( collection .omicron_zones .keys() .next() .copied() .expect("no sleds present"), + 1, ) .unwrap_err(); @@ -1058,16 +1111,30 @@ pub mod test { .expect("failed to create initial blueprint"); { - // Attempting to add Nexus to the zone we removed it from (with no + // Attempting to add Nexus to the sled we removed it from (with no // other changes to the environment) should succeed. let mut builder = BlueprintBuilder::new_based_on(&parent, &policy, "test") .expect("failed to create builder"); let added = builder - .sled_ensure_zone_nexus(sled_id) + .sled_ensure_zone_multiple_nexus(sled_id, 1) + .expect("failed to ensure nexus zone"); + + assert_eq!(added, EnsureMultiple::Added(1)); + } + + { + // Attempting to add multiple Nexus zones to the sled we removed it + // from (with no other changes to the environment) should also + // succeed. + let mut builder = + BlueprintBuilder::new_based_on(&parent, &policy, "test") + .expect("failed to create builder"); + let added = builder + .sled_ensure_zone_multiple_nexus(sled_id, 3) .expect("failed to ensure nexus zone"); - assert_eq!(added, Ensure::Added); + assert_eq!(added, EnsureMultiple::Added(3)); } { @@ -1093,7 +1160,9 @@ pub mod test { let mut builder = BlueprintBuilder::new_based_on(&parent, &policy, "test") .expect("failed to create builder"); - let err = builder.sled_ensure_zone_nexus(sled_id).unwrap_err(); + let err = builder + .sled_ensure_zone_multiple_nexus(sled_id, 1) + .unwrap_err(); assert!( matches!(err, Error::NoExternalServiceIpAvailable), diff --git a/nexus/deployment/src/default_service_count.rs b/nexus/deployment/src/default_service_count.rs new file mode 100644 index 0000000000..786c7dee7c --- /dev/null +++ b/nexus/deployment/src/default_service_count.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Default count of zones that should be provisioned by RSS and ensured by +//! blueprints created by [`crate::Planner`]. + +pub const NEXUS: usize = 3; diff --git a/nexus/deployment/src/lib.rs b/nexus/deployment/src/lib.rs index fd182ae613..ef30e55ad0 100644 --- a/nexus/deployment/src/lib.rs +++ b/nexus/deployment/src/lib.rs @@ -57,7 +57,7 @@ //! The Planner //! //! fleet policy (latest inventory) (latest blueprint) -//! \ | / +//! \ | / //! \ | / //! +----------+ | +----------/ //! | | | @@ -85,7 +85,7 @@ //! The Executor (better name?) //! //! latest committed blueprint latest inventory -//! | | +//! | | //! | | //! +----+ +----+ //! | | @@ -118,3 +118,4 @@ pub mod blueprint_builder; mod ip_allocator; pub mod planner; +pub mod default_service_count; diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index a6e775494a..89639d8eac 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -8,11 +8,17 @@ use crate::blueprint_builder::BlueprintBuilder; use crate::blueprint_builder::Ensure; +use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; +use crate::default_service_count; use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; use nexus_types::inventory::Collection; +use slog::warn; use slog::{info, Logger}; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use uuid::Uuid; pub struct Planner<'a> { log: Logger, @@ -28,6 +34,14 @@ pub struct Planner<'a> { // information about all sleds that we expect), we should verify that up // front and update callers to ensure that it's true. inventory: &'a Collection, + + // target number of Nexus zones that should be placed on sleds + // + // Currently, this is always set to `default_service_count::NEXUS` (i.e., + // the same number that RSS places), so we will only add a Nexus if a prior + // Nexus instance is removed. This is stored as a property to allow unit + // tests to tweak the value. + target_num_nexus_zones: usize, } impl<'a> Planner<'a> { @@ -42,7 +56,13 @@ impl<'a> Planner<'a> { ) -> anyhow::Result> { let blueprint = BlueprintBuilder::new_based_on(parent_blueprint, policy, creator)?; - Ok(Planner { log, policy, blueprint, inventory }) + Ok(Planner { + log, + policy, + blueprint, + inventory, + target_num_nexus_zones: default_service_count::NEXUS, + }) } pub fn plan(mut self) -> Result { @@ -61,6 +81,17 @@ impl<'a> Planner<'a> { // added and where they should go. And the blueprint builder will need // to grow the ability to provision one. + // After we make our initial pass through the sleds below to check for + // zones every sled should have (NTP, Crucible), we'll start making + // decisions about placing other service zones. We need to _exclude_ any + // sleds for which we just added an NTP zone, as we won't be able to add + // additional services to them until that NTP zone has been brought up. + // + // We will not mark sleds getting Crucible zones as ineligible; other + // control plane service zones starting concurrently with Crucible zones + // is fine. + let mut sleds_ineligible_for_services = BTreeSet::new(); + for (sled_id, sled_info) in &self.policy.sleds { // Check for an NTP zone. Every sled should have one. If it's not // there, all we can do is provision that one zone. We have to wait @@ -70,13 +101,14 @@ impl<'a> Planner<'a> { info!( &self.log, "found sled missing NTP zone (will add one)"; - "sled_id" => ?sled_id + "sled_id" => %sled_id ); self.blueprint .comment(&format!("sled {}: add NTP zone", sled_id)); // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". + sleds_ineligible_for_services.insert(*sled_id); continue; } @@ -100,7 +132,7 @@ impl<'a> Planner<'a> { let has_ntp_inventory = self .inventory .omicron_zones - .get(&sled_id) + .get(sled_id) .map(|sled_zones| { sled_zones.zones.zones.iter().any(|z| z.zone_type.is_ntp()) }) @@ -110,7 +142,7 @@ impl<'a> Planner<'a> { &self.log, "parent blueprint contains NTP zone, but it's not in \ inventory yet"; - "sled_id" => ?sled_id, + "sled_id" => %sled_id, ); continue; } @@ -133,30 +165,7 @@ impl<'a> Planner<'a> { } } - // TODO XXX To test Nexus zone addition, attempt to ensure every - // sled has a Nexus zone. This is NOT a sensible policy, and should - // be removed before this lands on `main`. - // - // Adding a new Nexus currently requires the parent blueprint to - // have had at least one Nexus. By trying to ensure a Nexus on every - // sled, that implicitly requires every parent_blueprint we accept - // to have at least one Nexus in order to successfully call this - // method. That should be true in production and is currently true - // for all tests, but might be overly restrictive? We could change - // this to "if at least one Nexus exists, add a Nexus on every - // sled", but that seems like overkill for what should be a very - // short-lived policy. - // - // Ensuring a Nexus zone exists will also fail if there are no - // external IP addresses left. We should take that into account with - // whatever real logic we put here, too. - let nexus_added = - match self.blueprint.sled_ensure_zone_nexus(*sled_id)? { - Ensure::Added => true, - Ensure::NotNeeded => false, - }; - - if ncrucibles_added > 0 || nexus_added { + if ncrucibles_added > 0 { // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". @@ -168,6 +177,126 @@ impl<'a> Planner<'a> { } } + self.ensure_correct_number_of_nexus_zones( + &sleds_ineligible_for_services, + )?; + + Ok(()) + } + + fn ensure_correct_number_of_nexus_zones( + &mut self, + sleds_ineligible_for_services: &BTreeSet, + ) -> Result<(), Error> { + // Bin every sled by the number of Nexus zones it currently has while + // counting the total number of Nexus zones. + let mut num_total_nexus = 0; + let mut sleds_by_num_nexus: BTreeMap> = + BTreeMap::new(); + for &sled_id in self.policy.sleds.keys() { + let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id)?; + num_total_nexus += num_nexus; + + // Only bin this sled if we're allowed to use it. If we have a sled + // we're not allowed to use that's already running a Nexus (seems + // fishy!), we counted its Nexus above but will ignore it here. + if !sleds_ineligible_for_services.contains(&sled_id) { + sleds_by_num_nexus.entry(num_nexus).or_default().push(sled_id); + } + } + + // TODO-correctness What should we do if we have _too many_ Nexus + // instances? For now, just log it the number of zones any time we have + // at least the minimum number. + let nexus_to_add = + self.target_num_nexus_zones.saturating_sub(num_total_nexus); + if nexus_to_add == 0 { + info!( + self.log, "sufficient Nexus zones exist in plan"; + "desired_count" => self.target_num_nexus_zones, + "current_count" => num_total_nexus, + ); + return Ok(()); + } + + // Ensure we have at least one sled on which we can add Nexus zones. If + // we don't, we have nothing else to do. This isn't a hard error, + // because we might be waiting for NTP on all eligible sleds (although + // it would be weird, since we're presumably running from within Nexus + // on some sled). + if sleds_by_num_nexus.is_empty() { + warn!(self.log, "want to add Nexus zones, but no eligible sleds"); + return Ok(()); + } + + // Build a map of sled -> new nexus zone count. + let mut sleds_to_change: BTreeMap = BTreeMap::new(); + + 'outer: for _ in 0..nexus_to_add { + // `sleds_by_num_nexus` is sorted by key already, and we want to + // pick from the lowest-numbered bin. We can just loop over its + // keys, expecting to stop on the first iteration, with the only + // exception being when we've removed all the sleds from a bin. + for (&num_nexus, sleds) in sleds_by_num_nexus.iter_mut() { + // TODO choose more smartly than effectively ordering by + // sled_id? See + // https://github.com/oxidecomputer/omicron/pull/4959#discussion_r1476795735. + let Some(sled_id) = sleds.pop() else { + // We already drained this bin; move on. + continue; + }; + + // This insert might overwrite an old value for this sled (e.g., + // in the "we have 1 sled and need to add many Nexus instances + // to it" case). That's fine. + sleds_to_change.insert(sled_id, num_nexus + 1); + + // Put this sled back in our map, but now with one more Nexus. + sleds_by_num_nexus + .entry(num_nexus + 1) + .or_default() + .push(sled_id); + + continue 'outer; + } + + // This should be unreachable: it's only possible if we fail to find + // a nonempty vec in `sleds_by_num_nexus`, and we checked above that + // `sleds_by_num_nexus` is not empty. + unreachable!("logic error finding sleds for Nexus"); + } + + // For each sled we need to change, actually do so. + let mut total_added = 0; + for (sled_id, new_nexus_count) in sleds_to_change { + match self + .blueprint + .sled_ensure_zone_multiple_nexus(sled_id, new_nexus_count)? + { + EnsureMultiple::Added(n) => { + info!( + self.log, "will add {n} Nexus zone(s) to sled"; + "sled_id" => %sled_id, + ); + total_added += n; + } + // This is only possible if we asked the sled to ensure the same + // number of zones it already has, but that's impossible based + // on the way we built up `sleds_to_change`. + EnsureMultiple::NotNeeded => unreachable!( + "sled on which we added Nexus zones did not add any" + ), + } + } + + // Double check that we didn't make any arithmetic mistakes. If we've + // arrived here, we think we've added the number of Nexus zones we + // needed to. + assert_eq!( + total_added, nexus_to_add, + "internal error counting Nexus zones" + ); + Ok(()) } } @@ -305,7 +434,7 @@ mod test { .expect("failed to plan"); let diff = blueprint3.diff(&blueprint5); - println!("3 -> 5 (expect 3 Crucible and 1 Nexus zones):\n{}", diff); + println!("3 -> 5 (expect Crucible zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); let sleds = diff.sleds_changed().collect::>(); @@ -319,28 +448,14 @@ mod test { assert_eq!(sled_changes.zones_removed().count(), 0); assert_eq!(sled_changes.zones_changed().count(), 0); let zones = sled_changes.zones_added().collect::>(); - let mut num_crucible_added = 0; - let mut num_nexus_added = 0; + assert_eq!(zones.len(), 3); for zone in &zones { - match zone.zone_type { - OmicronZoneType::Crucible { .. } => { - num_crucible_added += 1; - } - OmicronZoneType::Nexus { .. } => { - num_nexus_added += 1; - } - _ => panic!("unexpectedly added a non-Crucible zone: {zone:?}"), - } + let OmicronZoneType::Crucible { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Crucible zone: {zone:?}"); + }; } - assert_eq!(num_crucible_added, 3); - assert_eq!(num_nexus_added, 1); // Check that there are no more steps. - // - // This also implicitly checks that the new Nexus zone added in - // blueprint4 did not reuse any resources from the existing Nexus zones - // (otherwise creation of the planner would fail due to an invalid - // parent blueprint). let blueprint6 = Planner::new_based_on( logctx.log.clone(), &blueprint5, @@ -360,4 +475,165 @@ mod test { logctx.cleanup_successful(); } + + /// Check that the planner will add more Nexus zones to a single sled, if + /// needed + #[test] + fn test_add_multiple_nexus_to_one_sled() { + let logctx = test_setup_log("planner_add_multiple_nexus_to_one_sled"); + + // Use our example inventory collection as a starting point, but strip + // it down to just one sled. + let (sled_id, collection, policy) = { + let (mut collection, mut policy) = example(); + + // Pick one sled ID to keep and remove the rest. + let keep_sled_id = + policy.sleds.keys().next().copied().expect("no sleds"); + policy.sleds.retain(|&k, _v| keep_sled_id == k); + collection.sled_agents.retain(|&k, _v| keep_sled_id == k); + collection.omicron_zones.retain(|&k, _v| keep_sled_id == k); + + assert_eq!(collection.sled_agents.len(), 1); + assert_eq!(collection.omicron_zones.len(), 1); + + (keep_sled_id, collection, policy) + }; + + // Build the initial blueprint. + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + // This blueprint should only have 1 Nexus instance on the one sled we + // kept. + assert_eq!(blueprint1.omicron_zones.len(), 1); + assert_eq!( + blueprint1 + .omicron_zones + .get(&sled_id) + .expect("missing kept sled") + .zones + .iter() + .filter(|z| z.zone_type.is_nexus()) + .count(), + 1 + ); + + // Now run the planner. It should add additional Nexus instances to the + // one sled we have. + let target_num_nexus_zones = 5; + let mut planner2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &policy, + "add more Nexus", + &collection, + ) + .expect("failed to create planner"); + planner2.target_num_nexus_zones = target_num_nexus_zones; + let blueprint2 = planner2.plan().expect("failed to plan"); + + let diff = blueprint1.diff(&blueprint2); + println!("1 -> 2 (added additional Nexus zones):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + let mut sleds = diff.sleds_changed().collect::>(); + assert_eq!(sleds.len(), 1); + let (changed_sled_id, sled_changes) = sleds.pop().unwrap(); + assert_eq!(changed_sled_id, sled_id); + assert_eq!(sled_changes.zones_removed().count(), 0); + assert_eq!(sled_changes.zones_changed().count(), 0); + let zones = sled_changes.zones_added().collect::>(); + assert_eq!(zones.len(), target_num_nexus_zones - 1); + for zone in &zones { + let OmicronZoneType::Nexus { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Nexus zone: {zone:?}"); + }; + } + + logctx.cleanup_successful(); + } + + /// Check that the planner will spread additional Nexus zones out across + /// sleds as it adds them + #[test] + fn test_spread_additional_nexus_zones_across_sleds() { + let logctx = test_setup_log( + "planner_spread_additional_nexus_zones_across_sleds", + ); + + // Use our example inventory collection as a starting point. + let (collection, policy) = example(); + + // Build the initial blueprint. + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + // This blueprint should only have 3 Nexus zones: one on each sled. + assert_eq!(blueprint1.omicron_zones.len(), 3); + for sled_config in blueprint1.omicron_zones.values() { + assert_eq!( + sled_config + .zones + .iter() + .filter(|z| z.zone_type.is_nexus()) + .count(), + 1 + ); + } + + // Now run the planner with a high number of target Nexus zones. + let target_num_nexus_zones = 14; + let mut planner2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &policy, + "add more Nexus", + &collection, + ) + .expect("failed to create planner"); + planner2.target_num_nexus_zones = target_num_nexus_zones; + let blueprint2 = planner2.plan().expect("failed to plan"); + + let diff = blueprint1.diff(&blueprint2); + println!("1 -> 2 (added additional Nexus zones):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + let sleds = diff.sleds_changed().collect::>(); + + // All 3 sleds should get additional Nexus zones. We expect a total of + // 11 new Nexus zones, which should be spread evenly across the three + // sleds (two should get 4 and one should get 3). + assert_eq!(sleds.len(), 3); + let mut total_new_nexus_zones = 0; + for (sled_id, sled_changes) in sleds { + assert_eq!(sled_changes.zones_removed().count(), 0); + assert_eq!(sled_changes.zones_changed().count(), 0); + let zones = sled_changes.zones_added().collect::>(); + match zones.len() { + n @ (3 | 4) => { + total_new_nexus_zones += n; + } + n => { + panic!("unexpected number of zones added to {sled_id}: {n}") + } + } + for zone in &zones { + let OmicronZoneType::Nexus { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Crucible zone: {zone:?}"); + }; + } + } + assert_eq!(total_new_nexus_zones, 11); + + logctx.cleanup_successful(); + } } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 5bd205b32e..860e118263 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -44,6 +44,7 @@ libc.workspace = true macaddr.workspace = true mg-admin-client.workspace = true nexus-client.workspace = true +nexus-deployment.workspace = true omicron-common.workspace = true once_cell.workspace = true oximeter.workspace = true diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index bed82a7a01..2a169082ed 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -12,6 +12,7 @@ use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; use internal_dns::config::{Host, ZoneVariant}; use internal_dns::ServiceName; +use nexus_deployment::default_service_count; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, @@ -44,9 +45,6 @@ use uuid::Uuid; // The number of boundary NTP servers to create from RSS. const BOUNDARY_NTP_COUNT: usize = 2; -// The number of Nexus instances to create from RSS. -const NEXUS_COUNT: usize = 3; - // The number of CRDB instances to create from RSS. const CRDB_COUNT: usize = 5; @@ -458,7 +456,7 @@ impl Plan { } // Provision Nexus zones, continuing to stripe across sleds. - for _ in 0..NEXUS_COUNT { + for _ in 0..default_service_count::NEXUS { let sled = { let which_sled = sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; From a3e0a51f6899b3688a8116e97a19165ccde972e0 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 16:23:00 -0500 Subject: [PATCH 04/12] cleanup/typos from PR review --- nexus/deployment/src/blueprint_builder.rs | 58 +++++++++++------------ 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 8625b90edc..a9c00f642b 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -110,7 +110,7 @@ pub struct BlueprintBuilder<'a> { creator: String, comments: Vec, - // These fields emulate how RSS choses addresses for zone NICs. + // These fields mirror how RSS chooses addresses for zone NICs. nexus_v4_ips: Box + Send>, nexus_v6_ips: Box + Send>, @@ -215,38 +215,36 @@ impl<'a> BlueprintBuilder<'a> { // but expect this to be okay: we don't anticipate removal and addition // to frequently be combined into the exact same blueprint, particularly // in a way that expects the addition to reuse resources from the - // removal; we will won't want to attempt to reuse resources from a zone + // removal; we won't want to attempt to reuse resources from a zone // until we know it's been fully removed. let mut existing_nexus_v4_ips: HashSet = HashSet::new(); let mut existing_nexus_v6_ips: HashSet = HashSet::new(); let mut used_external_ips: HashSet = HashSet::new(); let mut used_macs: HashSet = HashSet::new(); - for zone_config in parent_blueprint.omicron_zones.values() { - for z in &zone_config.zones { - if let OmicronZoneType::Nexus { nic, .. } = &z.zone_type { - match nic.ip { - IpAddr::V4(ip) => { - if !existing_nexus_v4_ips.insert(ip) { - bail!("duplicate Nexus NIC IP: {ip}"); - } + for (_, z) in parent_blueprint.all_omicron_zones() { + if let OmicronZoneType::Nexus { nic, .. } = &z.zone_type { + match nic.ip { + IpAddr::V4(ip) => { + if !existing_nexus_v4_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); } - IpAddr::V6(ip) => { - if !existing_nexus_v6_ips.insert(ip) { - bail!("duplicate Nexus NIC IP: {ip}"); - } + } + IpAddr::V6(ip) => { + if !existing_nexus_v6_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); } } } - if let Some(external_ip) = z.zone_type.external_ip()? { - if !used_external_ips.insert(external_ip) { - bail!("duplicate external IP: {external_ip}"); - } + } + if let Some(external_ip) = z.zone_type.external_ip()? { + if !used_external_ips.insert(external_ip) { + bail!("duplicate external IP: {external_ip}"); } - if let Some(nic) = z.zone_type.service_vnic() { - if !used_macs.insert(nic.mac) { - bail!("duplicate service vNIC MAC: {}", nic.mac); - } + } + if let Some(nic) = z.zone_type.service_vnic() { + if !used_macs.insert(nic.mac) { + bail!("duplicate service vNIC MAC: {}", nic.mac); } } } @@ -1143,15 +1141,13 @@ pub mod test { // Nexus with no remaining external IPs should fail. let mut policy = policy.clone(); let mut used_ip_ranges = Vec::new(); - for zones in parent.omicron_zones.values() { - for z in &zones.zones { - if let Some(ip) = z - .zone_type - .external_ip() - .expect("failed to check for external IP") - { - used_ip_ranges.push(IpRange::from(ip)); - } + for (_, z) in parent.all_omicron_zones() { + if let Some(ip) = z + .zone_type + .external_ip() + .expect("failed to check for external IP") + { + used_ip_ranges.push(IpRange::from(ip)); } } assert!(!used_ip_ranges.is_empty()); From 7f2d8f2c6df4b3cb2f4762c17d90e6868ca42b24 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 16:23:11 -0500 Subject: [PATCH 05/12] cargo fmt --- nexus/deployment/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/deployment/src/lib.rs b/nexus/deployment/src/lib.rs index ef30e55ad0..1e8ecd333d 100644 --- a/nexus/deployment/src/lib.rs +++ b/nexus/deployment/src/lib.rs @@ -116,6 +116,6 @@ //! updates, etc. pub mod blueprint_builder; +pub mod default_service_count; mod ip_allocator; pub mod planner; -pub mod default_service_count; From 38071d684f4f8787132ed19f0fe9e91d41f95988 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 16:54:46 -0500 Subject: [PATCH 06/12] move target_nexus_zone_count from Planner to Policy --- nexus/deployment/src/blueprint_builder.rs | 1 + nexus/deployment/src/planner.rs | 47 ++++++++--------------- nexus/src/app/deployment.rs | 7 +++- nexus/types/src/deployment.rs | 7 ++++ 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index a9c00f642b..f76fd274b2 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -740,6 +740,7 @@ pub mod test { ) .unwrap() .into()], + target_nexus_zone_count: 3, }; let mut service_ip_pool_range = policy.service_ip_pool_ranges[0].iter(); let mut nexus_nic_ips = NEXUS_OPTE_IPV4_SUBNET diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 89639d8eac..2c4265f086 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -10,7 +10,6 @@ use crate::blueprint_builder::BlueprintBuilder; use crate::blueprint_builder::Ensure; use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; -use crate::default_service_count; use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; use nexus_types::inventory::Collection; @@ -34,14 +33,6 @@ pub struct Planner<'a> { // information about all sleds that we expect), we should verify that up // front and update callers to ensure that it's true. inventory: &'a Collection, - - // target number of Nexus zones that should be placed on sleds - // - // Currently, this is always set to `default_service_count::NEXUS` (i.e., - // the same number that RSS places), so we will only add a Nexus if a prior - // Nexus instance is removed. This is stored as a property to allow unit - // tests to tweak the value. - target_num_nexus_zones: usize, } impl<'a> Planner<'a> { @@ -56,13 +47,7 @@ impl<'a> Planner<'a> { ) -> anyhow::Result> { let blueprint = BlueprintBuilder::new_based_on(parent_blueprint, policy, creator)?; - Ok(Planner { - log, - policy, - blueprint, - inventory, - target_num_nexus_zones: default_service_count::NEXUS, - }) + Ok(Planner { log, policy, blueprint, inventory }) } pub fn plan(mut self) -> Result { @@ -209,11 +194,11 @@ impl<'a> Planner<'a> { // instances? For now, just log it the number of zones any time we have // at least the minimum number. let nexus_to_add = - self.target_num_nexus_zones.saturating_sub(num_total_nexus); + self.policy.target_nexus_zone_count.saturating_sub(num_total_nexus); if nexus_to_add == 0 { info!( self.log, "sufficient Nexus zones exist in plan"; - "desired_count" => self.target_num_nexus_zones, + "desired_count" => self.policy.target_nexus_zone_count, "current_count" => num_total_nexus, ); return Ok(()); @@ -484,7 +469,7 @@ mod test { // Use our example inventory collection as a starting point, but strip // it down to just one sled. - let (sled_id, collection, policy) = { + let (sled_id, collection, mut policy) = { let (mut collection, mut policy) = example(); // Pick one sled ID to keep and remove the rest. @@ -525,17 +510,17 @@ mod test { // Now run the planner. It should add additional Nexus instances to the // one sled we have. - let target_num_nexus_zones = 5; - let mut planner2 = Planner::new_based_on( + policy.target_nexus_zone_count = 5; + let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, &policy, "add more Nexus", &collection, ) - .expect("failed to create planner"); - planner2.target_num_nexus_zones = target_num_nexus_zones; - let blueprint2 = planner2.plan().expect("failed to plan"); + .expect("failed to create planner") + .plan() + .expect("failed to plan"); let diff = blueprint1.diff(&blueprint2); println!("1 -> 2 (added additional Nexus zones):\n{}", diff); @@ -548,7 +533,7 @@ mod test { assert_eq!(sled_changes.zones_removed().count(), 0); assert_eq!(sled_changes.zones_changed().count(), 0); let zones = sled_changes.zones_added().collect::>(); - assert_eq!(zones.len(), target_num_nexus_zones - 1); + assert_eq!(zones.len(), policy.target_nexus_zone_count - 1); for zone in &zones { let OmicronZoneType::Nexus { .. } = zone.zone_type else { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); @@ -567,7 +552,7 @@ mod test { ); // Use our example inventory collection as a starting point. - let (collection, policy) = example(); + let (collection, mut policy) = example(); // Build the initial blueprint. let blueprint1 = BlueprintBuilder::build_initial_from_collection( @@ -591,17 +576,17 @@ mod test { } // Now run the planner with a high number of target Nexus zones. - let target_num_nexus_zones = 14; - let mut planner2 = Planner::new_based_on( + policy.target_nexus_zone_count = 14; + let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, &policy, "add more Nexus", &collection, ) - .expect("failed to create planner"); - planner2.target_num_nexus_zones = target_num_nexus_zones; - let blueprint2 = planner2.plan().expect("failed to plan"); + .expect("failed to create planner") + .plan() + .expect("failed to plan"); let diff = blueprint1.diff(&blueprint2); println!("1 -> 2 (added additional Nexus zones):\n{}", diff); diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index c8eeeb0192..716123df35 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -8,6 +8,7 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; +use nexus_deployment::default_service_count; use nexus_deployment::planner::Planner; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; @@ -219,7 +220,11 @@ impl super::Nexus { Ok(PlanningContext { creator, - policy: Policy { sleds, service_ip_pool_ranges }, + policy: Policy { + sleds, + service_ip_pool_ranges, + target_nexus_zone_count: default_service_count::NEXUS, + }, inventory, }) } diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 936a9ae1a6..c43cac50b6 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -55,6 +55,13 @@ pub struct Policy { /// ranges specified by the IP pool for externally-visible control plane /// services (e.g., external DNS, Nexus, boundary NTP) pub service_ip_pool_ranges: Vec, + + /// desired total number of deployed Nexus zones + // This number probably doesn't make sense alone once we get to a world + // where we're deploying multiple _versions_ of Nexus and need to ensure a + // minimum number in service at any given time, but for now all we know how + // to do is deploy a fixed number of all-the-same-version. + pub target_nexus_zone_count: usize, } /// Describes the resources available on each sled for the planner From 87ba3876a987855ecab830a530ca2fa89558bcc2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 17:17:42 -0500 Subject: [PATCH 07/12] doc fix --- nexus/deployment/src/default_service_count.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/deployment/src/default_service_count.rs b/nexus/deployment/src/default_service_count.rs index 786c7dee7c..5881da6dc9 100644 --- a/nexus/deployment/src/default_service_count.rs +++ b/nexus/deployment/src/default_service_count.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Default count of zones that should be provisioned by RSS and ensured by -//! blueprints created by [`crate::Planner`]. +//! Default counts of Omicron service zones that should be provised by RSS and +//! the reconfiguration system. pub const NEXUS: usize = 3; From 97489400c6221be65d165ee400ad264dc9dfbda7 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 17:18:00 -0500 Subject: [PATCH 08/12] switch from random to sequential mac addr allocation --- nexus/deployment/src/blueprint_builder.rs | 31 ++++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index f76fd274b2..3bf2f9c218 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -54,6 +54,8 @@ pub enum Error { NoNexusZonesInParentBlueprint, #[error("no external service IP addresses are available")] NoExternalServiceIpAvailable, + #[error("no system MAC addresses are available")] + NoSystemMacAddressAvailable, #[error("exhausted available Nexus IP addresses")] ExhaustedNexusIps, #[error("programming error in planner")] @@ -117,8 +119,8 @@ pub struct BlueprintBuilder<'a> { // Iterator of available external IPs for service zones available_external_ips: Box + Send + 'a>, - // All MACs used by NICs in zones of the blueprint we're building. - used_macs: HashSet, + // Iterator of available MAC addresses in the system address range + available_system_macs: Box>, } impl<'a> BlueprintBuilder<'a> { @@ -276,6 +278,9 @@ impl<'a> BlueprintBuilder<'a> { .flat_map(|r| r.iter()) .filter(move |ip| !used_external_ips.contains(ip)), ); + let available_system_macs = Box::new( + MacAddr::iter_system().filter(move |mac| !used_macs.contains(mac)), + ); Ok(BlueprintBuilder { parent_blueprint, @@ -288,7 +293,7 @@ impl<'a> BlueprintBuilder<'a> { nexus_v4_ips, nexus_v6_ips, available_external_ips, - used_macs, + available_system_macs, }) } @@ -569,12 +574,16 @@ impl<'a> BlueprintBuilder<'a> { IpNet::from(*NEXUS_OPTE_IPV6_SUBNET).into(), ), }; + let mac = self + .available_system_macs + .next() + .ok_or(Error::NoSystemMacAddressAvailable)?; NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service(nexus_id), name: format!("nexus-{nexus_id}").parse().unwrap(), ip, - mac: self.random_mac(), + mac, subnet, vni: Vni::SERVICES_VNI, primary: true, @@ -603,20 +612,6 @@ impl<'a> BlueprintBuilder<'a> { Ok(EnsureMultiple::Added(num_nexus_to_add)) } - fn random_mac(&mut self) -> MacAddr { - let mut mac = MacAddr::random_system(); - - // TODO-performance if the size of `used_macs` starts to approach some - // nontrivial fraction of the total random space, we could do something - // smarter here (e.g., start with a random mac then increment until we - // find an unused value). - while !self.used_macs.insert(mac) { - mac = MacAddr::random_system(); - } - - mac - } - fn sled_add_zone( &mut self, sled_id: Uuid, From e0278a52cec0b7f4a26ee0dc5eabb15060d95f05 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 5 Feb 2024 17:33:57 -0500 Subject: [PATCH 09/12] fix db tests --- nexus/db-queries/src/db/datastore/deployment.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index e354d49c70..7d0df4f532 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1061,8 +1061,11 @@ mod tests { use std::mem; use std::net::Ipv6Addr; - static EMPTY_POLICY: Policy = - Policy { sleds: BTreeMap::new(), service_ip_pool_ranges: Vec::new() }; + static EMPTY_POLICY: Policy = Policy { + sleds: BTreeMap::new(), + service_ip_pool_ranges: Vec::new(), + target_nexus_zone_count: 0, + }; // This is a not-super-future-maintainer-friendly helper to check that all // the subtables related to blueprints have been pruned of a specific @@ -1133,6 +1136,10 @@ mod tests { }) .collect(), service_ip_pool_ranges: Vec::new(), + target_nexus_zone_count: collection + .all_omicron_zones() + .filter(|z| z.zone_type.is_nexus()) + .count(), } } From d332154954d2efe30a937d79dcd5765d81ef5c47 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 8 Feb 2024 11:31:10 -0500 Subject: [PATCH 10/12] move NEXUS_REDUNDANCY from nexus-deployment to omicron-common to share with RSS --- Cargo.lock | 1 - common/src/address.rs | 6 ++++++ nexus/deployment/src/default_service_count.rs | 8 -------- nexus/deployment/src/lib.rs | 1 - nexus/src/app/deployment.rs | 4 ++-- sled-agent/Cargo.toml | 1 - sled-agent/src/rack_setup/plan/service.rs | 7 +++---- 7 files changed, 11 insertions(+), 17 deletions(-) delete mode 100644 nexus/deployment/src/default_service_count.rs diff --git a/Cargo.lock b/Cargo.lock index 00e075877d..5814dd101a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5278,7 +5278,6 @@ dependencies = [ "macaddr", "mg-admin-client", "nexus-client", - "nexus-deployment", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", diff --git a/common/src/address.rs b/common/src/address.rs index c26c6ff911..152fb9319e 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -24,6 +24,12 @@ pub const MAX_PORT: u16 = u16::MAX; /// minimum possible value for a tcp or udp port pub const MIN_PORT: u16 = u16::MIN; +/// The amount of redundancy for Nexus services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new Nexus zones) +pub const NEXUS_REDUNDANCY: usize = 3; + /// The amount of redundancy for internal DNS servers. /// /// Must be less than or equal to MAX_DNS_REDUNDANCY. diff --git a/nexus/deployment/src/default_service_count.rs b/nexus/deployment/src/default_service_count.rs deleted file mode 100644 index 5881da6dc9..0000000000 --- a/nexus/deployment/src/default_service_count.rs +++ /dev/null @@ -1,8 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Default counts of Omicron service zones that should be provised by RSS and -//! the reconfiguration system. - -pub const NEXUS: usize = 3; diff --git a/nexus/deployment/src/lib.rs b/nexus/deployment/src/lib.rs index 1e8ecd333d..546f2c1dc1 100644 --- a/nexus/deployment/src/lib.rs +++ b/nexus/deployment/src/lib.rs @@ -116,6 +116,5 @@ //! updates, etc. pub mod blueprint_builder; -pub mod default_service_count; mod ip_allocator; pub mod planner; diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 716123df35..5818d1aa12 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -8,7 +8,6 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; -use nexus_deployment::default_service_count; use nexus_deployment::planner::Planner; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; @@ -21,6 +20,7 @@ use nexus_types::identity::Asset; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; +use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -223,7 +223,7 @@ impl super::Nexus { policy: Policy { sleds, service_ip_pool_ranges, - target_nexus_zone_count: default_service_count::NEXUS, + target_nexus_zone_count: NEXUS_REDUNDANCY, }, inventory, }) diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 860e118263..5bd205b32e 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -44,7 +44,6 @@ libc.workspace = true macaddr.workspace = true mg-admin-client.workspace = true nexus-client.workspace = true -nexus-deployment.workspace = true omicron-common.workspace = true once_cell.workspace = true oximeter.workspace = true diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 2a169082ed..10bfc34b26 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -12,12 +12,11 @@ use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; use internal_dns::config::{Host, ZoneVariant}; use internal_dns::ServiceName; -use nexus_deployment::default_service_count; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, - MGD_PORT, MGS_PORT, NTP_PORT, NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, - SLED_PREFIX, + MGD_PORT, MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, + RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{MacAddr, Vni}; use omicron_common::api::internal::shared::SwitchLocation; @@ -456,7 +455,7 @@ impl Plan { } // Provision Nexus zones, continuing to stripe across sleds. - for _ in 0..default_service_count::NEXUS { + for _ in 0..NEXUS_REDUNDANCY { let sled = { let which_sled = sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; From 7ae7461de9160a866ec1541f8eeb8707ae21fa66 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 8 Feb 2024 11:33:08 -0500 Subject: [PATCH 11/12] comment cleanup from PR review --- nexus/deployment/src/planner.rs | 5 ++--- nexus/types/src/deployment.rs | 4 ---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 10303d223a..2c054b8cb5 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -223,9 +223,8 @@ impl<'a> Planner<'a> { // keys, expecting to stop on the first iteration, with the only // exception being when we've removed all the sleds from a bin. for (&num_nexus, sleds) in sleds_by_num_nexus.iter_mut() { - // TODO choose more smartly than effectively ordering by - // sled_id? See - // https://github.com/oxidecomputer/omicron/pull/4959#discussion_r1476795735. + // `sleds` contains all sleds with the minimum number of Nexus + // zones. Pick one arbitrarily but deterministically. let Some(sled_id) = sleds.pop() else { // We already drained this bin; move on. continue; diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 4798d54afa..7438e2199a 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -57,10 +57,6 @@ pub struct Policy { pub service_ip_pool_ranges: Vec, /// desired total number of deployed Nexus zones - // This number probably doesn't make sense alone once we get to a world - // where we're deploying multiple _versions_ of Nexus and need to ensure a - // minimum number in service at any given time, but for now all we know how - // to do is deploy a fixed number of all-the-same-version. pub target_nexus_zone_count: usize, } From b941323f6cae79eaaf3509173f3616f089a2b939 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 8 Feb 2024 15:40:50 -0500 Subject: [PATCH 12/12] blueprint planner: skip non-provisionable sleds (#5003) --- .../db-queries/src/db/datastore/deployment.rs | 7 +- nexus/deployment/src/blueprint_builder.rs | 10 +- nexus/deployment/src/planner.rs | 105 ++++++++++++++++++ nexus/src/app/deployment.rs | 6 +- nexus/types/src/deployment.rs | 4 + 5 files changed, 129 insertions(+), 3 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 7d0df4f532..cde6e7e8c6 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1053,6 +1053,7 @@ mod tests { use nexus_test_utils::db::test_setup_database; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; + use nexus_types::external_api::views::SledProvisionState; use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; use omicron_test_utils::dev; @@ -1115,7 +1116,11 @@ mod tests { }) .collect(); let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); - SledResources { zpools, subnet: Ipv6Subnet::new(ip) } + SledResources { + provision_state: SledProvisionState::Provisionable, + zpools, + subnet: Ipv6Subnet::new(ip), + } } // Create a `Policy` that contains all the sleds found in `collection` diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 248615286d..1bf46d34b2 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -718,6 +718,7 @@ impl<'a> BlueprintZones<'a> { #[cfg(test)] pub mod test { use super::*; + use nexus_types::external_api::views::SledProvisionState; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::address::Ipv6Subnet; @@ -890,7 +891,14 @@ pub mod test { .collect(); let subnet = Ipv6Subnet::::new(sled_ip); - policy.sleds.insert(sled_id, SledResources { zpools, subnet }); + policy.sleds.insert( + sled_id, + SledResources { + provision_state: SledProvisionState::Provisionable, + zpools, + subnet, + }, + ); sled_ip } diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 2c054b8cb5..cbdcfd80c0 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -12,6 +12,7 @@ use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; +use nexus_types::external_api::views::SledProvisionState; use nexus_types::inventory::Collection; use slog::warn; use slog::{info, Logger}; @@ -162,6 +163,20 @@ impl<'a> Planner<'a> { } } + // We've now placed all the services that should always exist on all + // sleds. Before moving on to make decisions about placing services that + // are _not_ present on all sleds, check the provision state of all our + // sleds so we can avoid any non-provisionable sleds under the + // assumption that there is something amiss with them. + sleds_ineligible_for_services.extend( + self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| { + match sled_info.provision_state { + SledProvisionState::Provisionable => None, + SledProvisionState::NonProvisionable => Some(*sled_id), + } + }), + ); + self.ensure_correct_number_of_nexus_zones( &sleds_ineligible_for_services, )?; @@ -293,6 +308,7 @@ mod test { use crate::blueprint_builder::test::verify_blueprint; use crate::blueprint_builder::BlueprintBuilder; use nexus_inventory::now_db_precision; + use nexus_types::external_api::views::SledProvisionState; use nexus_types::inventory::OmicronZoneType; use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; @@ -627,4 +643,93 @@ mod test { logctx.cleanup_successful(); } + + /// Check that the planner will skip non-provisionable sleds when allocating + /// extra Nexus zones + #[test] + fn test_nexus_allocation_skips_nonprovisionable_sleds() { + let logctx = test_setup_log( + "planner_nexus_allocation_skips_nonprovisionable_sleds", + ); + + // Use our example inventory collection as a starting point. + let (collection, mut policy) = example(); + + // Build the initial blueprint. + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + // This blueprint should only have 3 Nexus zones: one on each sled. + assert_eq!(blueprint1.omicron_zones.len(), 3); + for sled_config in blueprint1.omicron_zones.values() { + assert_eq!( + sled_config + .zones + .iter() + .filter(|z| z.zone_type.is_nexus()) + .count(), + 1 + ); + } + + // Arbitrarily choose one of the sleds and mark it non-provisionable. + let nonprovisionable_sled_id = { + let (sled_id, resources) = + policy.sleds.iter_mut().next().expect("no sleds"); + resources.provision_state = SledProvisionState::NonProvisionable; + *sled_id + }; + + // Now run the planner with a high number of target Nexus zones. + policy.target_nexus_zone_count = 14; + let blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &policy, + "add more Nexus", + &collection, + ) + .expect("failed to create planner") + .plan() + .expect("failed to plan"); + + let diff = blueprint1.diff(&blueprint2); + println!("1 -> 2 (added additional Nexus zones):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + let sleds = diff.sleds_changed().collect::>(); + + // Only 2 of the 3 sleds should get additional Nexus zones. We expect a + // total of 11 new Nexus zones, which should be spread evenly across the + // two sleds (one gets 6 and the other gets 5), while the + // non-provisionable sled should be unchanged. + assert_eq!(sleds.len(), 2); + let mut total_new_nexus_zones = 0; + for (sled_id, sled_changes) in sleds { + assert!(sled_id != nonprovisionable_sled_id); + assert_eq!(sled_changes.zones_removed().count(), 0); + assert_eq!(sled_changes.zones_changed().count(), 0); + let zones = sled_changes.zones_added().collect::>(); + match zones.len() { + n @ (5 | 6) => { + total_new_nexus_zones += n; + } + n => { + panic!("unexpected number of zones added to {sled_id}: {n}") + } + } + for zone in &zones { + let OmicronZoneType::Nexus { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Crucible zone: {zone:?}"); + }; + } + } + assert_eq!(total_new_nexus_zones, 11); + + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 5818d1aa12..b8cb6deabf 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -171,7 +171,11 @@ impl super::Nexus { let zpools = zpools_by_sled_id .remove(&sled_id) .unwrap_or_else(BTreeSet::new); - let sled_info = SledResources { subnet, zpools }; + let sled_info = SledResources { + provision_state: sled_row.provision_state().into(), + subnet, + zpools, + }; (sled_id, sled_info) }) .collect(); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 7438e2199a..06427507d5 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -11,6 +11,7 @@ //! nexus/deployment does not currently know about nexus/db-model and it's //! convenient to separate these concerns.) +use crate::external_api::views::SledProvisionState; use crate::inventory::Collection; pub use crate::inventory::NetworkInterface; pub use crate::inventory::NetworkInterfaceKind; @@ -63,6 +64,9 @@ pub struct Policy { /// Describes the resources available on each sled for the planner #[derive(Debug, Clone)] pub struct SledResources { + /// provision state of this sled + pub provision_state: SledProvisionState, + /// zpools on this sled /// /// (used to allocate storage for control plane zones with persistent