diff --git a/Cargo.lock b/Cargo.lock index 5814dd101a..f0706a2cc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4305,6 +4305,7 @@ dependencies = [ "omicron-common", "omicron-nexus", "omicron-rpaths", + "omicron-test-utils", "omicron-workspace-hack", "pq-sys", "reqwest", diff --git a/nexus/blueprint-execution/Cargo.toml b/nexus/blueprint-execution/Cargo.toml index 11d8003599..cfdc455f36 100644 --- a/nexus/blueprint-execution/Cargo.toml +++ b/nexus/blueprint-execution/Cargo.toml @@ -9,8 +9,10 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true futures.workspace = true +nexus-db-model.workspace = true nexus-db-queries.workspace = true nexus-types.workspace = true +omicron-common.workspace = true reqwest.workspace = true sled-agent-client.workspace = true slog.workspace = true @@ -31,4 +33,5 @@ nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true omicron-common.workspace = true omicron-nexus.workspace = true +omicron-test-utils.workspace = true tokio.workspace = true diff --git a/nexus/blueprint-execution/src/lib.rs b/nexus/blueprint-execution/src/lib.rs index f7bfd7d30c..e4f8a537c0 100644 --- a/nexus/blueprint-execution/src/lib.rs +++ b/nexus/blueprint-execution/src/lib.rs @@ -12,6 +12,7 @@ use nexus_types::deployment::Blueprint; use slog::o; mod omicron_zones; +mod resource_allocation; /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint @@ -24,6 +25,16 @@ pub async fn realize_blueprint( blueprint: &Blueprint, ) -> Result<(), Vec> { let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); + + resource_allocation::ensure_zone_resources_allocated( + &log, + opctx, + datastore, + &blueprint.omicron_zones, + ) + .await + .map_err(|err| vec![err])?; + omicron_zones::deploy_zones( &log, opctx, diff --git a/nexus/blueprint-execution/src/resource_allocation.rs b/nexus/blueprint-execution/src/resource_allocation.rs new file mode 100644 index 0000000000..92307af0e0 --- /dev/null +++ b/nexus/blueprint-execution/src/resource_allocation.rs @@ -0,0 +1,998 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manges allocation of resources required for blueprint realization + +use anyhow::bail; +use anyhow::Context; +use nexus_db_model::IncompleteNetworkInterface; +use nexus_db_model::Name; +use nexus_db_model::SqlU16; +use nexus_db_model::VpcSubnet; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; +use nexus_db_queries::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; +use nexus_db_queries::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::NetworkInterface; +use nexus_types::deployment::NetworkInterfaceKind; +use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::OmicronZonesConfig; +use nexus_types::deployment::SourceNatConfig; +use omicron_common::api::external::IdentityMetadataCreateParams; +use slog::info; +use slog::warn; +use slog::Logger; +use std::collections::BTreeMap; +use std::net::IpAddr; +use std::net::SocketAddr; +use uuid::Uuid; + +pub(crate) async fn ensure_zone_resources_allocated( + log: &Logger, + opctx: &OpContext, + datastore: &DataStore, + zones: &BTreeMap, +) -> anyhow::Result<()> { + let allocator = ResourceAllocator { log, opctx, datastore }; + + for config in zones.values() { + for z in &config.zones { + match &z.zone_type { + OmicronZoneType::Nexus { external_ip, nic, .. } => { + allocator + .ensure_nexus_external_networking_allocated( + z.id, + *external_ip, + nic, + ) + .await?; + } + OmicronZoneType::ExternalDns { dns_address, nic, .. } => { + allocator + .ensure_external_dns_external_networking_allocated( + z.id, + dns_address, + nic, + ) + .await?; + } + OmicronZoneType::BoundaryNtp { snat_cfg, nic, .. } => { + allocator + .ensure_boundary_ntp_external_networking_allocated( + z.id, snat_cfg, nic, + ) + .await?; + } + OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::Clickhouse { .. } + | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::CockroachDb { .. } + | OmicronZoneType::Crucible { .. } + | OmicronZoneType::CruciblePantry { .. } + | OmicronZoneType::InternalDns { .. } + | OmicronZoneType::Oximeter { .. } => (), + } + } + } + + Ok(()) +} + +struct ResourceAllocator<'a> { + log: &'a Logger, + opctx: &'a OpContext, + datastore: &'a DataStore, +} + +impl<'a> ResourceAllocator<'a> { + // Helper function to determine whether a given external IP address is + // already allocated to a specific service zone. + async fn is_external_ip_already_allocated( + &self, + zone_type: &'static str, + zone_id: Uuid, + external_ip: IpAddr, + port_range: Option<(u16, u16)>, + ) -> anyhow::Result { + let allocated_ips = self + .datastore + .service_lookup_external_ips(self.opctx, zone_id) + .await + .with_context(|| { + format!( + "failed to look up external IPs for {zone_type} {zone_id}" + ) + })?; + + if !allocated_ips.is_empty() { + // All the service zones that want external IP addresses only expect + // to have a single IP. This service already has (at least) one: + // make sure this list includes the one we want, or return an error. + for allocated_ip in &allocated_ips { + if allocated_ip.ip.ip() == external_ip + && port_range + .map(|(first, last)| { + allocated_ip.first_port == SqlU16(first) + && allocated_ip.last_port == SqlU16(last) + }) + .unwrap_or(true) + { + info!( + self.log, "found already-allocated external IP"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "ip" => %external_ip, + ); + return Ok(true); + } + } + + warn!( + self.log, "zone has unexpected IPs allocated"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "want_ip" => %external_ip, + "allocated_ips" => ?allocated_ips, + ); + bail!( + "zone {zone_id} already has {} non-matching IP(s) allocated", + allocated_ips.len() + ); + } + + info!( + self.log, "external IP allocation required for zone"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "ip" => %external_ip, + ); + + Ok(false) + } + + // Helper function to determine whether a given NIC is already allocated to + // a specific service zone. + async fn is_nic_already_allocated( + &self, + zone_type: &'static str, + zone_id: Uuid, + nic: &NetworkInterface, + ) -> anyhow::Result { + let allocated_nics = self + .datastore + .service_list_network_interfaces(self.opctx, zone_id) + .await + .with_context(|| { + format!("failed to look up NICs for {zone_type} {zone_id}") + })?; + + if !allocated_nics.is_empty() { + // All the service zones that want NICs only expect to have a single + // one. Bail out here if this zone already has one or more allocated + // NICs but not the one we think it needs. + // + // This doesn't check the allocated NIC's subnet against our NICs, + // because that would require an extra DB lookup. We'll assume if + // these main properties are correct, the subnet is too. + for allocated_nic in &allocated_nics { + if allocated_nic.ip.ip() == nic.ip + && *allocated_nic.mac == nic.mac + && allocated_nic.slot == i16::from(nic.slot) + && allocated_nic.primary == nic.primary + { + info!( + self.log, "found already-allocated NIC"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "nic" => ?allocated_nic, + ); + return Ok(true); + } + } + + warn!( + self.log, "zone has unexpected NICs allocated"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "want_nic" => ?nic, + "allocated_nics" => ?allocated_nics, + ); + + bail!( + "zone {zone_id} already has {} non-matching NIC(s) allocated", + allocated_nics.len() + ); + } + + info!( + self.log, "NIC allocation required for zone"; + "zone_type" => zone_type, + "zone_id" => %zone_id, + "nid" => ?nic, + ); + + Ok(false) + } + + // Nexus and ExternalDns both use non-SNAT service IPs; this method is used + // to allocate external networking for both of them. + async fn ensure_external_service_ip( + &self, + zone_type: &'static str, + service_id: Uuid, + external_ip: IpAddr, + ip_name: &Name, + ) -> anyhow::Result<()> { + // Only attempt to allocate `external_ip` if it isn't already assigned + // to this zone. + // + // Checking for the existing of the external IP and then creating it + // if not found inserts a classic TOCTOU race: what if another Nexus + // is running concurrently, we both check and see that the IP is not + // allocated, then both attempt to create it? We believe this is + // okay: the loser of the race (i.e., the one whose create tries to + // commit second) will fail to allocate the IP, which will bubble + // out and prevent realization of the current blueprint. That's + // exactly what we want if two Nexuses try to realize the same + // blueprint at the same time. + if self + .is_external_ip_already_allocated( + zone_type, + service_id, + external_ip, + None, + ) + .await? + { + return Ok(()); + } + let ip_id = Uuid::new_v4(); + let description = zone_type; + self.datastore + .allocate_explicit_service_ip( + self.opctx, + ip_id, + ip_name, + description, + service_id, + external_ip, + ) + .await + .with_context(|| { + format!( + "failed to allocate IP to {zone_type} {service_id}: \ + {external_ip}" + ) + })?; + + info!( + self.log, "successfully allocated external IP"; + "zone_type" => zone_type, + "zone_id" => %service_id, + "ip" => %external_ip, + "ip_id" => %ip_id, + ); + + Ok(()) + } + + // BoundaryNtp uses a SNAT service IPs; this method is similar to + // `ensure_external_service_ip` but accounts for that. + async fn ensure_external_service_snat_ip( + &self, + zone_type: &'static str, + service_id: Uuid, + snat: &SourceNatConfig, + ) -> anyhow::Result<()> { + // Only attempt to allocate `external_ip` if it isn't already assigned + // to this zone. + // + // This is subject to the same kind of TOCTOU race as described for IP + // allocation in `ensure_external_service_ip`, and we believe it's okay + // for the same reasons as described there. + if self + .is_external_ip_already_allocated( + zone_type, + service_id, + snat.ip, + Some((snat.first_port, snat.last_port)), + ) + .await? + { + return Ok(()); + } + + let ip_id = Uuid::new_v4(); + self.datastore + .allocate_explicit_service_snat_ip( + self.opctx, + ip_id, + service_id, + snat.ip, + (snat.first_port, snat.last_port), + ) + .await + .with_context(|| { + format!( + "failed to allocate snat IP to {zone_type} {service_id}: \ + {snat:?}" + ) + })?; + + info!( + self.log, "successfully allocated external SNAT IP"; + "zone_type" => zone_type, + "zone_id" => %service_id, + "snat" => ?snat, + "ip_id" => %ip_id, + ); + + Ok(()) + } + + // All service zones with external connectivity get service vNICs. + async fn ensure_service_nic( + &self, + zone_type: &'static str, + service_id: Uuid, + nic: &NetworkInterface, + nic_subnet: &VpcSubnet, + ) -> anyhow::Result<()> { + // We don't pass `nic.kind` into the database below, but instead + // explicitly call `service_create_network_interface`. Ensure this is + // indeed a service NIC. + match &nic.kind { + NetworkInterfaceKind::Instance { .. } => { + bail!("invalid NIC kind (expected service, got instance)") + } + NetworkInterfaceKind::Service { .. } => (), + } + + // Only attempt to allocate `nic` if it isn't already assigned to this + // zone. + // + // This is subject to the same kind of TOCTOU race as described for IP + // allocation in `ensure_external_service_ip`, and we believe it's okay + // for the same reasons as described there. + if self.is_nic_already_allocated(zone_type, service_id, nic).await? { + return Ok(()); + } + let nic_arg = IncompleteNetworkInterface::new_service( + nic.id, + service_id, + nic_subnet.clone(), + IdentityMetadataCreateParams { + name: nic.name.clone(), + description: format!("{zone_type} service vNIC"), + }, + Some(nic.ip), + Some(nic.mac), + ) + .with_context(|| { + format!( + "failed to convert NIC into IncompleteNetworkInterface: {nic:?}" + ) + })?; + let created_nic = self + .datastore + .service_create_network_interface(self.opctx, nic_arg) + .await + .map_err(|err| err.into_external()) + .with_context(|| { + format!( + "failed to allocate NIC to {zone_type} {service_id}: \ + {nic:?}" + ) + })?; + + // We don't pass all the properties of `nic` into the create request + // above. Double-check that the properties the DB assigned match + // what we expect. + // + // We do not check `nic.vni`, because it's not stored in the + // database. (All services are given the constant vni + // `Vni::SERVICES_VNI`.) + if created_nic.primary != nic.primary + || created_nic.slot != i16::from(nic.slot) + { + warn!( + self.log, "unexpected property on allocated NIC"; + "db_primary" => created_nic.primary, + "expected_primary" => nic.primary, + "db_slot" => created_nic.slot, + "expected_slot" => nic.slot, + ); + + // Now what? We've allocated a NIC in the database but it's + // incorrect. Should we try to delete it? That would be best + // effort (we could fail to delete, or we could crash between + // creation and deletion). + // + // We only expect services to have one NIC, so the only way it + // should be possible to get a different primary/slot value is + // if somehow this same service got a _different_ NIC allocated + // to it in the TOCTOU race window above. That should be + // impossible with the way we generate blueprints, so we'll just + // return a scary error here and expect to never see it. + bail!( + "database cleanup required: \ + unexpected NIC ({created_nic:?}) \ + allocated for {zone_type} {service_id}" + ); + } + + info!( + self.log, "successfully allocated service vNIC"; + "zone_type" => zone_type, + "zone_id" => %service_id, + "nic" => ?nic, + ); + + Ok(()) + } + + async fn ensure_nexus_external_networking_allocated( + &self, + zone_id: Uuid, + external_ip: IpAddr, + nic: &NetworkInterface, + ) -> anyhow::Result<()> { + self.ensure_external_service_ip( + "nexus", + zone_id, + external_ip, + &Name(nic.name.clone()), + ) + .await?; + self.ensure_service_nic("nexus", zone_id, nic, &NEXUS_VPC_SUBNET) + .await?; + Ok(()) + } + + async fn ensure_external_dns_external_networking_allocated( + &self, + zone_id: Uuid, + dns_address: &str, + nic: &NetworkInterface, + ) -> anyhow::Result<()> { + let dns_address = + dns_address.parse::().with_context(|| { + format!("failed to parse ExternalDns address {dns_address}") + })?; + self.ensure_external_service_ip( + "external_dns", + zone_id, + dns_address.ip(), + &Name(nic.name.clone()), + ) + .await?; + self.ensure_service_nic("external_dns", zone_id, nic, &DNS_VPC_SUBNET) + .await?; + Ok(()) + } + + async fn ensure_boundary_ntp_external_networking_allocated( + &self, + zone_id: Uuid, + snat: &SourceNatConfig, + nic: &NetworkInterface, + ) -> anyhow::Result<()> { + self.ensure_external_service_snat_ip("ntp", zone_id, snat).await?; + self.ensure_service_nic("ntp", zone_id, nic, &NTP_VPC_SUBNET).await?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::OmicronZoneConfig; + use nexus_types::deployment::OmicronZoneDataset; + use nexus_types::identity::Resource; + use omicron_common::address::IpRange; + use omicron_common::address::DNS_OPTE_IPV4_SUBNET; + use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; + use omicron_common::address::NTP_OPTE_IPV4_SUBNET; + use omicron_common::address::NUM_SOURCE_NAT_PORTS; + use omicron_common::api::external::Generation; + use omicron_common::api::external::IpNet; + use omicron_common::api::external::MacAddr; + use omicron_common::api::external::Vni; + use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; + use std::net::IpAddr; + use std::net::Ipv6Addr; + use std::net::SocketAddrV6; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test] + async fn test_allocate_external_networking( + cptestctx: &ControlPlaneTestContext, + ) { + // Set up. + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let log = &opctx.log; + + // Create an external IP range we can use for our services. + let external_ip_range = IpRange::try_from(( + "192.0.2.1".parse::().unwrap(), + "192.0.2.100".parse::().unwrap(), + )) + .expect("bad IP range"); + let mut external_ips = external_ip_range.iter(); + + // Add the external IP range to the services IP pool. + let (ip_pool, _) = datastore + .ip_pools_service_lookup(&opctx) + .await + .expect("failed to find service IP pool"); + datastore + .ip_pool_add_range(&opctx, &ip_pool, &external_ip_range) + .await + .expect("failed to expand service IP pool"); + + // Generate the values we care about. (Other required zone config params + // that we don't care about will be filled in below arbitrarily.) + + // Nexus: + let nexus_id = Uuid::new_v4(); + let nexus_external_ip = + external_ips.next().expect("exhausted external_ips"); + let nexus_nic = NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(nexus_id), + name: "test-nexus".parse().expect("bad name"), + ip: NEXUS_OPTE_IPV4_SUBNET + .iter() + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .unwrap() + .into(), + mac: MacAddr::random_system(), + subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }; + + // External DNS: + let dns_id = Uuid::new_v4(); + let dns_external_ip = + external_ips.next().expect("exhausted external_ips"); + let dns_nic = NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(dns_id), + name: "test-external-dns".parse().expect("bad name"), + ip: DNS_OPTE_IPV4_SUBNET + .iter() + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .unwrap() + .into(), + mac: MacAddr::random_system(), + subnet: IpNet::from(*DNS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }; + + // Boundary NTP: + let ntp_id = Uuid::new_v4(); + let ntp_snat = SourceNatConfig { + ip: external_ips.next().expect("exhausted external_ips"), + first_port: NUM_SOURCE_NAT_PORTS, + last_port: 2 * NUM_SOURCE_NAT_PORTS - 1, + }; + let ntp_nic = NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service(ntp_id), + name: "test-external-ntp".parse().expect("bad name"), + ip: NTP_OPTE_IPV4_SUBNET + .iter() + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .unwrap() + .into(), + mac: MacAddr::random_system(), + subnet: IpNet::from(*NTP_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }; + + // Build the `zones` map needed by `ensure_zone_resources_allocated`, + // with an arbitrary sled_id. + let mut zones = BTreeMap::new(); + let sled_id = Uuid::new_v4(); + zones.insert( + sled_id, + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![ + OmicronZoneConfig { + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: Ipv6Addr::LOCALHOST.to_string(), + external_ip: nexus_external_ip, + nic: nexus_nic.clone(), + external_tls: false, + external_dns_servers: Vec::new(), + }, + }, + OmicronZoneConfig { + id: dns_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .expect("bad name"), + }, + http_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ) + .to_string(), + dns_address: SocketAddr::new(dns_external_ip, 0) + .to_string(), + nic: dns_nic.clone(), + }, + }, + OmicronZoneConfig { + id: ntp_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::BoundaryNtp { + address: SocketAddr::new(dns_external_ip, 0) + .to_string(), + ntp_servers: Vec::new(), + dns_servers: Vec::new(), + domain: None, + nic: ntp_nic.clone(), + snat_cfg: ntp_snat, + }, + }, + ], + }, + ); + + // Initialize resource allocation: this should succeed and create all + // the relevant db records. + ensure_zone_resources_allocated(log, &opctx, datastore, &zones) + .await + .with_context(|| format!("{zones:#?}")) + .unwrap(); + + // Check that the external IP records were created. + let db_nexus_ips = datastore + .service_lookup_external_ips(&opctx, nexus_id) + .await + .expect("failed to get external IPs"); + assert_eq!(db_nexus_ips.len(), 1); + assert!(db_nexus_ips[0].is_service); + assert_eq!(db_nexus_ips[0].parent_id, Some(nexus_id)); + assert_eq!(db_nexus_ips[0].ip, nexus_external_ip.into()); + assert_eq!(db_nexus_ips[0].first_port, SqlU16(0)); + assert_eq!(db_nexus_ips[0].last_port, SqlU16(65535)); + + let db_dns_ips = datastore + .service_lookup_external_ips(&opctx, dns_id) + .await + .expect("failed to get external IPs"); + assert_eq!(db_dns_ips.len(), 1); + assert!(db_dns_ips[0].is_service); + assert_eq!(db_dns_ips[0].parent_id, Some(dns_id)); + assert_eq!(db_dns_ips[0].ip, dns_external_ip.into()); + assert_eq!(db_dns_ips[0].first_port, SqlU16(0)); + assert_eq!(db_dns_ips[0].last_port, SqlU16(65535)); + + let db_ntp_ips = datastore + .service_lookup_external_ips(&opctx, ntp_id) + .await + .expect("failed to get external IPs"); + assert_eq!(db_ntp_ips.len(), 1); + assert!(db_ntp_ips[0].is_service); + assert_eq!(db_ntp_ips[0].parent_id, Some(ntp_id)); + assert_eq!(db_ntp_ips[0].ip, ntp_snat.ip.into()); + assert_eq!(db_ntp_ips[0].first_port, SqlU16(ntp_snat.first_port)); + assert_eq!(db_ntp_ips[0].last_port, SqlU16(ntp_snat.last_port)); + + // Check that the NIC records were created. + let db_nexus_nics = datastore + .service_list_network_interfaces(&opctx, nexus_id) + .await + .expect("failed to get NICs"); + assert_eq!(db_nexus_nics.len(), 1); + assert_eq!(db_nexus_nics[0].id(), nexus_nic.id); + assert_eq!(db_nexus_nics[0].service_id, nexus_id); + assert_eq!(db_nexus_nics[0].vpc_id, NEXUS_VPC_SUBNET.vpc_id); + assert_eq!(db_nexus_nics[0].subnet_id, NEXUS_VPC_SUBNET.id()); + assert_eq!(*db_nexus_nics[0].mac, nexus_nic.mac); + assert_eq!(db_nexus_nics[0].ip, nexus_nic.ip.into()); + assert_eq!(db_nexus_nics[0].slot, i16::from(nexus_nic.slot)); + assert_eq!(db_nexus_nics[0].primary, nexus_nic.primary); + + let db_dns_nics = datastore + .service_list_network_interfaces(&opctx, dns_id) + .await + .expect("failed to get NICs"); + assert_eq!(db_dns_nics.len(), 1); + assert_eq!(db_dns_nics[0].id(), dns_nic.id); + assert_eq!(db_dns_nics[0].service_id, dns_id); + assert_eq!(db_dns_nics[0].vpc_id, DNS_VPC_SUBNET.vpc_id); + assert_eq!(db_dns_nics[0].subnet_id, DNS_VPC_SUBNET.id()); + assert_eq!(*db_dns_nics[0].mac, dns_nic.mac); + assert_eq!(db_dns_nics[0].ip, dns_nic.ip.into()); + assert_eq!(db_dns_nics[0].slot, i16::from(dns_nic.slot)); + assert_eq!(db_dns_nics[0].primary, dns_nic.primary); + + let db_ntp_nics = datastore + .service_list_network_interfaces(&opctx, ntp_id) + .await + .expect("failed to get NICs"); + assert_eq!(db_ntp_nics.len(), 1); + assert_eq!(db_ntp_nics[0].id(), ntp_nic.id); + assert_eq!(db_ntp_nics[0].service_id, ntp_id); + assert_eq!(db_ntp_nics[0].vpc_id, NTP_VPC_SUBNET.vpc_id); + assert_eq!(db_ntp_nics[0].subnet_id, NTP_VPC_SUBNET.id()); + assert_eq!(*db_ntp_nics[0].mac, ntp_nic.mac); + assert_eq!(db_ntp_nics[0].ip, ntp_nic.ip.into()); + assert_eq!(db_ntp_nics[0].slot, i16::from(ntp_nic.slot)); + assert_eq!(db_ntp_nics[0].primary, ntp_nic.primary); + + // We should be able to run the function again with the same inputs, and + // it should succeed without inserting any new records. + ensure_zone_resources_allocated(log, &opctx, datastore, &zones) + .await + .with_context(|| format!("{zones:#?}")) + .unwrap(); + assert_eq!( + db_nexus_ips, + datastore + .service_lookup_external_ips(&opctx, nexus_id) + .await + .expect("failed to get external IPs") + ); + assert_eq!( + db_dns_ips, + datastore + .service_lookup_external_ips(&opctx, dns_id) + .await + .expect("failed to get external IPs") + ); + assert_eq!( + db_ntp_ips, + datastore + .service_lookup_external_ips(&opctx, ntp_id) + .await + .expect("failed to get external IPs") + ); + assert_eq!( + db_nexus_nics, + datastore + .service_list_network_interfaces(&opctx, nexus_id) + .await + .expect("failed to get NICs") + ); + assert_eq!( + db_dns_nics, + datastore + .service_list_network_interfaces(&opctx, dns_id) + .await + .expect("failed to get NICs") + ); + assert_eq!( + db_ntp_nics, + datastore + .service_list_network_interfaces(&opctx, ntp_id) + .await + .expect("failed to get NICs") + ); + + // Now that we've tested the happy path, try some requests that ought to + // fail because the request includes an external IP that doesn't match + // the already-allocated external IPs from above. + let bogus_ip = external_ips.next().expect("exhausted external_ips"); + for mutate_zones_fn in [ + // non-matching IP on Nexus + (&|config: &mut OmicronZonesConfig| { + for zone in &mut config.zones { + if let OmicronZoneType::Nexus { + ref mut external_ip, .. + } = &mut zone.zone_type + { + *external_ip = bogus_ip; + return format!( + "zone {} already has 1 non-matching IP", + zone.id + ); + } + } + panic!("didn't find expected zone"); + }) as &dyn Fn(&mut OmicronZonesConfig) -> String, + // non-matching IP on External DNS + &|config| { + for zone in &mut config.zones { + if let OmicronZoneType::ExternalDns { + ref mut dns_address, + .. + } = &mut zone.zone_type + { + *dns_address = SocketAddr::new(bogus_ip, 0).to_string(); + return format!( + "zone {} already has 1 non-matching IP", + zone.id + ); + } + } + panic!("didn't find expected zone"); + }, + // non-matching SNAT port range on Boundary NTP + &|config| { + for zone in &mut config.zones { + if let OmicronZoneType::BoundaryNtp { + ref mut snat_cfg, + .. + } = &mut zone.zone_type + { + snat_cfg.first_port += NUM_SOURCE_NAT_PORTS; + snat_cfg.last_port += NUM_SOURCE_NAT_PORTS; + return format!( + "zone {} already has 1 non-matching IP", + zone.id + ); + } + } + panic!("didn't find expected zone"); + }, + ] { + // Run `mutate_zones_fn` on our config... + let mut config = + zones.remove(&sled_id).expect("missing zone config"); + let orig_config = config.clone(); + let expected_error = mutate_zones_fn(&mut config); + zones.insert(sled_id, config); + + // ... check that we get the error we expect + let err = + ensure_zone_resources_allocated(log, &opctx, datastore, &zones) + .await + .expect_err("unexpected success"); + assert!( + err.to_string().contains(&expected_error), + "expected {expected_error:?}, got {err:#}" + ); + + // ... and restore the original, valid config before iterating. + zones.insert(sled_id, orig_config); + } + + // Also try some requests that ought to fail because the request + // includes a NIC that doesn't match the already-allocated NICs from + // above. + // + // All three zone types have a `nic` property, so here our mutating + // function only modifies that, and the body of our loop tries it on all + // three to ensure we get the errors we expect no matter the zone type. + for mutate_nic_fn in [ + // switch kind from Service to Instance + (&|_: Uuid, nic: &mut NetworkInterface| { + match &nic.kind { + NetworkInterfaceKind::Instance { .. } => { + panic!( + "invalid NIC kind (expected service, got instance)" + ) + } + NetworkInterfaceKind::Service(id) => { + let id = *id; + nic.kind = NetworkInterfaceKind::Instance(id); + } + } + "invalid NIC kind".to_string() + }) as &dyn Fn(Uuid, &mut NetworkInterface) -> String, + // non-matching IP + &|zone_id, nic| { + nic.ip = bogus_ip; + format!("zone {zone_id} already has 1 non-matching NIC") + }, + ] { + // Try this NIC mutation on Nexus... + let mut mutated_zones = zones.clone(); + for zone in &mut mutated_zones + .get_mut(&sled_id) + .expect("missing sled") + .zones + { + if let OmicronZoneType::Nexus { ref mut nic, .. } = + &mut zone.zone_type + { + let expected_error = mutate_nic_fn(zone.id, nic); + + let err = ensure_zone_resources_allocated( + log, + &opctx, + datastore, + &mutated_zones, + ) + .await + .expect_err("unexpected success"); + + assert!( + err.to_string().contains(&expected_error), + "expected {expected_error:?}, got {err:#}" + ); + + break; + } + } + + // ... and again on ExternalDns + let mut mutated_zones = zones.clone(); + for zone in &mut mutated_zones + .get_mut(&sled_id) + .expect("missing sled") + .zones + { + if let OmicronZoneType::ExternalDns { ref mut nic, .. } = + &mut zone.zone_type + { + let expected_error = mutate_nic_fn(zone.id, nic); + + let err = ensure_zone_resources_allocated( + log, + &opctx, + datastore, + &mutated_zones, + ) + .await + .expect_err("unexpected success"); + + assert!( + err.to_string().contains(&expected_error), + "expected {expected_error:?}, got {err:#}" + ); + + break; + } + } + + // ... and again on BoundaryNtp + let mut mutated_zones = zones.clone(); + for zone in &mut mutated_zones + .get_mut(&sled_id) + .expect("missing sled") + .zones + { + if let OmicronZoneType::BoundaryNtp { ref mut nic, .. } = + &mut zone.zone_type + { + let expected_error = mutate_nic_fn(zone.id, nic); + + let err = ensure_zone_resources_allocated( + log, + &opctx, + datastore, + &mutated_zones, + ) + .await + .expect_err("unexpected success"); + + assert!( + err.to_string().contains(&expected_error), + "expected {expected_error:?}, got {err:#}" + ); + + break; + } + } + } + } +} diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index 3d3fabbe66..5122bcf11d 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -84,7 +84,7 @@ pub struct InstanceNetworkInterface { /// The underlying "table" (`service_network_interface`) is actually a view /// over the `network_interface` table, that contains only rows with /// `kind = 'service'`. -#[derive(Selectable, Queryable, Clone, Debug, Resource)] +#[derive(Selectable, Queryable, Clone, Debug, PartialEq, Eq, Resource)] #[diesel(table_name = service_network_interface)] pub struct ServiceNetworkInterface { #[diesel(embed)]