From 1e76acade0bcac1415e8709ec52d23515164489c Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 20 Feb 2024 18:44:12 -0800 Subject: [PATCH] update internal DNS during blueprint execution (#4989) --- Cargo.lock | 8 + clients/dns-service-client/Cargo.toml | 1 + clients/dns-service-client/src/diff.rs | 230 ++++++ clients/dns-service-client/src/lib.rs | 31 +- common/src/api/external/mod.rs | 6 + dev-tools/omdb/src/bin/omdb/nexus.rs | 2 +- internal-dns/src/config.rs | 102 ++- internal-dns/src/names.rs | 6 - nexus/blueprint-execution/Cargo.toml | 11 +- nexus/blueprint-execution/src/dns.rs | 745 ++++++++++++++++++ nexus/blueprint-execution/src/lib.rs | 76 +- .../blueprint-execution/src/omicron_zones.rs | 192 ++--- nexus/db-model/src/deployment.rs | 3 + nexus/db-model/src/schema.rs | 4 +- nexus/db-queries/src/context.rs | 59 ++ .../db-queries/src/db/datastore/deployment.rs | 67 +- nexus/db-queries/src/db/datastore/dns.rs | 277 ++++++- nexus/db-queries/src/db/datastore/mod.rs | 9 + nexus/db-queries/src/db/datastore/silo.rs | 5 +- nexus/db-queries/src/db/datastore/sled.rs | 76 ++ nexus/deployment/src/blueprint_builder.rs | 109 ++- nexus/deployment/src/planner.rs | 47 +- .../src/app/background/blueprint_execution.rs | 71 +- nexus/src/app/background/blueprint_load.rs | 2 + nexus/src/app/background/init.rs | 3 +- nexus/src/app/deployment.rs | 35 +- nexus/test-utils/src/lib.rs | 23 - nexus/types/src/deployment.rs | 18 +- nexus/types/src/inventory.rs | 8 + openapi/nexus-internal.json | 18 + schema/crdb/36.0.0/up1.sql | 8 + schema/crdb/36.0.0/up2.sql | 2 + schema/crdb/dbinit.sql | 7 +- sled-agent/src/rack_setup/plan/service.rs | 95 ++- 34 files changed, 1975 insertions(+), 381 deletions(-) create mode 100644 clients/dns-service-client/src/diff.rs create mode 100644 nexus/blueprint-execution/src/dns.rs create mode 100644 schema/crdb/36.0.0/up1.sql create mode 100644 schema/crdb/36.0.0/up2.sql diff --git a/Cargo.lock b/Cargo.lock index bcbcb3e9c8..e15afdfbab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1887,6 +1887,7 @@ dependencies = [ name = "dns-service-client" version = "0.1.0" dependencies = [ + "anyhow", "chrono", "http 0.2.11", "omicron-workspace-hack", @@ -4267,21 +4268,28 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "dns-service-client", "futures", "httptest", + "internal-dns", + "ipnet", "nexus-db-model", "nexus-db-queries", + "nexus-deployment", + "nexus-inventory", "nexus-test-utils", "nexus-test-utils-macros", "nexus-types", "omicron-common", "omicron-nexus", "omicron-rpaths", + "omicron-test-utils", "omicron-workspace-hack", "pq-sys", "reqwest", "sled-agent-client", "slog", + "slog-error-chain", "tokio", "uuid", ] diff --git a/clients/dns-service-client/Cargo.toml b/clients/dns-service-client/Cargo.toml index 6132222b8a..27ffb66d88 100644 --- a/clients/dns-service-client/Cargo.toml +++ b/clients/dns-service-client/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +anyhow.workspace = true chrono.workspace = true http.workspace = true progenitor.workspace = true diff --git a/clients/dns-service-client/src/diff.rs b/clients/dns-service-client/src/diff.rs new file mode 100644 index 0000000000..39d51cc974 --- /dev/null +++ b/clients/dns-service-client/src/diff.rs @@ -0,0 +1,230 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::types::DnsConfigParams; +use crate::types::DnsRecord; +use crate::DnsRecords; +use anyhow::ensure; +use anyhow::Context; + +/// Compare the DNS records contained in two sets of DNS configuration +#[derive(Debug)] +pub struct DnsDiff<'a> { + left: &'a DnsRecords, + right: &'a DnsRecords, +} + +impl<'a> DnsDiff<'a> { + /// Compare the DNS records contained in two sets of DNS configuration + /// + /// Both configurations are expected to contain exactly one zone and they + /// should have the same name. + pub fn new( + left: &'a DnsConfigParams, + right: &'a DnsConfigParams, + ) -> Result, anyhow::Error> { + let left_zone = left.sole_zone().context("left side of diff")?; + let right_zone = right.sole_zone().context("right side of diff")?; + + ensure!( + left_zone.zone_name == right_zone.zone_name, + "cannot compare DNS configuration from zones with different names: \ + {:?} vs. {:?}", left_zone.zone_name, right_zone.zone_name, + ); + + Ok(DnsDiff { left: &left_zone.records, right: &right_zone.records }) + } + + /// Iterate over the names that are present in the `right` config but + /// absent in the `left` one (i.e., added between `left` and `right`) + pub fn names_added(&self) -> impl Iterator { + self.right + .iter() + .filter(|(k, _)| !self.left.contains_key(*k)) + .map(|(k, v)| (k.as_ref(), v.as_ref())) + } + + /// Iterate over the names that are present in the `left` config but + /// absent in the `right` one (i.e., removed between `left` and `right`) + pub fn names_removed(&self) -> impl Iterator { + self.left + .iter() + .filter(|(k, _)| !self.right.contains_key(*k)) + .map(|(k, v)| (k.as_ref(), v.as_ref())) + } + + /// Iterate over the names whose records changed between `left` and `right`. + pub fn names_changed( + &self, + ) -> impl Iterator { + self.left.iter().filter_map(|(k, v1)| match self.right.get(k) { + Some(v2) if v1 != v2 => { + Some((k.as_ref(), v1.as_ref(), v2.as_ref())) + } + _ => None, + }) + } + + /// Returns true iff there are no differences in the DNS names and records + /// described by the given configurations + pub fn is_empty(&self) -> bool { + self.names_added().next().is_none() + && self.names_removed().next().is_none() + && self.names_changed().next().is_none() + } +} + +#[cfg(test)] +mod test { + use super::DnsDiff; + use crate::types::DnsConfigParams; + use crate::types::DnsConfigZone; + use crate::types::DnsRecord; + use chrono::Utc; + use std::collections::HashMap; + use std::net::Ipv4Addr; + + const ZONE_NAME: &str = "dummy"; + + fn example() -> DnsConfigParams { + DnsConfigParams { + generation: 4, + time_created: Utc::now(), + zones: vec![DnsConfigZone { + zone_name: ZONE_NAME.to_string(), + records: HashMap::from([ + ( + "ex1".to_string(), + vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], + ), + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.3".parse().unwrap())], + ), + ]), + }], + } + } + + #[test] + fn diff_invalid() { + let example_empty = DnsConfigParams { + generation: 3, + time_created: Utc::now(), + zones: vec![], + }; + + // Configs must have at least one zone. + let error = DnsDiff::new(&example_empty, &example_empty) + .expect_err("unexpectedly succeeded comparing two empty configs"); + assert!( + format!("{:#}", error).contains("expected exactly one DNS zone") + ); + + let example = example(); + let error = DnsDiff::new(&example_empty, &example) + .expect_err("unexpectedly succeeded comparing an empty config"); + assert!( + format!("{:#}", error).contains("expected exactly one DNS zone") + ); + + // Configs must not have more than one zone. + let example_multiple = DnsConfigParams { + generation: 3, + time_created: Utc::now(), + zones: vec![ + DnsConfigZone { + zone_name: ZONE_NAME.to_string(), + records: HashMap::new(), + }, + DnsConfigZone { + zone_name: "two".to_string(), + records: HashMap::new(), + }, + ], + }; + let error = DnsDiff::new(&example_multiple, &example).expect_err( + "unexpectedly succeeded comparing config with multiple zones", + ); + assert!( + format!("{:#}", error).contains("expected exactly one DNS zone") + ); + + // Cannot compare different zone names + let example_different_zone = DnsConfigParams { + generation: 3, + time_created: Utc::now(), + zones: vec![DnsConfigZone { + zone_name: format!("{}-other", ZONE_NAME), + records: HashMap::new(), + }], + }; + let error = DnsDiff::new(&example_different_zone, &example).expect_err( + "unexpectedly succeeded comparing configs with \ + different zone names", + ); + assert_eq!( + format!("{:#}", error), + "cannot compare DNS configuration from zones with different \ + names: \"dummy-other\" vs. \"dummy\"" + ); + } + + #[test] + fn diff_equivalent() { + let example = example(); + let diff = DnsDiff::new(&example, &example).unwrap(); + assert!(diff.is_empty()); + assert_eq!(diff.names_removed().count(), 0); + assert_eq!(diff.names_added().count(), 0); + assert_eq!(diff.names_changed().count(), 0); + } + + #[test] + fn diff_different() { + let example = example(); + let example2 = DnsConfigParams { + generation: 4, + time_created: Utc::now(), + zones: vec![DnsConfigZone { + zone_name: ZONE_NAME.to_string(), + records: HashMap::from([ + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.4".parse().unwrap())], + ), + ( + "ex3".to_string(), + vec![DnsRecord::A(std::net::Ipv4Addr::LOCALHOST)], + ), + ]), + }], + }; + + let diff = DnsDiff::new(&example, &example2).unwrap(); + assert!(!diff.is_empty()); + + let removed = diff.names_removed().collect::>(); + assert_eq!(removed.len(), 1); + assert_eq!(removed[0].0, "ex1"); + assert_eq!(removed[0].1, vec![DnsRecord::A(Ipv4Addr::LOCALHOST)]); + + let added = diff.names_added().collect::>(); + assert_eq!(added.len(), 1); + assert_eq!(added[0].0, "ex3"); + assert_eq!(added[0].1, vec![DnsRecord::A(Ipv4Addr::LOCALHOST)]); + + let changed = diff.names_changed().collect::>(); + assert_eq!(changed.len(), 1); + assert_eq!(changed[0].0, "ex2"); + assert_eq!( + changed[0].1, + vec![DnsRecord::A("192.168.1.3".parse().unwrap())] + ); + assert_eq!( + changed[0].2, + vec![DnsRecord::A("192.168.1.4".parse().unwrap())] + ); + } +} diff --git a/clients/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs index 52c2b8bcd2..cd17a1559c 100644 --- a/clients/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -2,10 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +mod diff; + +use crate::Error as DnsConfigError; +use anyhow::ensure; +pub use diff::DnsDiff; +use std::collections::HashMap; + progenitor::generate_api!( spec = "../../openapi/dns-server.json", inner_type = slog::Logger, - derives = [schemars::JsonSchema, Eq, PartialEq], + derives = [schemars::JsonSchema, Clone, Eq, PartialEq], pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; "method" => %request.method(), @@ -22,8 +29,6 @@ pub const ERROR_CODE_UPDATE_IN_PROGRESS: &'static str = "UpdateInProgress"; pub const ERROR_CODE_BAD_UPDATE_GENERATION: &'static str = "BadUpdateGeneration"; -use crate::Error as DnsConfigError; - /// Returns whether an error from this client should be retried pub fn is_retryable(error: &DnsConfigError) -> bool { let response_value = match error { @@ -84,3 +89,23 @@ pub fn is_retryable(error: &DnsConfigError) -> bool { false } + +type DnsRecords = HashMap>; + +impl types::DnsConfigParams { + /// Given a high-level DNS configuration, return a reference to its sole + /// DNS zone. + /// + /// # Errors + /// + /// Returns an error if there are 0 or more than one zones in this + /// configuration. + pub fn sole_zone(&self) -> Result<&types::DnsConfigZone, anyhow::Error> { + ensure!( + self.zones.len() == 1, + "expected exactly one DNS zone, but found {}", + self.zones.len() + ); + Ok(&self.zones[0]) + } +} diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index cdc929d89b..17b4826f8c 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -679,6 +679,12 @@ impl From<&Generation> for i64 { } } +impl From for u64 { + fn from(g: Generation) -> Self { + g.0 + } +} + impl From for Generation { fn from(value: u32) -> Self { Generation(u64::from(value)) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 9904263067..aed7d86ba0 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -870,7 +870,7 @@ async fn cmd_nexus_blueprints_diff( let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context( || format!("fetching blueprint {}", args.blueprint2_id), )?; - println!("{}", b1.diff(&b2)); + println!("{}", b1.diff_sleds(&b2)); Ok(()) } diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index bf1d9b763b..5eee34bd51 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -63,9 +63,9 @@ use crate::names::{ServiceName, DNS_ZONE}; use anyhow::{anyhow, ensure}; use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord}; -use omicron_common::api::internal::shared::SwitchLocation; +use omicron_common::api::external::Generation; use std::collections::BTreeMap; -use std::net::{Ipv6Addr, SocketAddrV6}; +use std::net::Ipv6Addr; use uuid::Uuid; /// Zones that can be referenced within the internal DNS system. @@ -147,8 +147,6 @@ pub struct DnsConfigBuilder { /// network sleds: BTreeMap, - scrimlets: BTreeMap, - /// set of hosts of type "zone" that have been configured so far, mapping /// each zone's unique uuid to its sole IPv6 address on the control plane /// network @@ -163,6 +161,9 @@ pub struct DnsConfigBuilder { /// similar to service_instances_zones, but for services that run on sleds service_instances_sleds: BTreeMap>, + + /// generation number for this config + generation: Generation, } /// Describes a host of type "sled" in the control plane DNS zone @@ -192,16 +193,17 @@ impl DnsConfigBuilder { DnsConfigBuilder { sleds: BTreeMap::new(), zones: BTreeMap::new(), - scrimlets: BTreeMap::new(), service_instances_zones: BTreeMap::new(), service_instances_sleds: BTreeMap::new(), + generation: Generation::new(), } } /// Add a new host of type "sled" to the configuration /// - /// Returns a [`Sled`] that can be used with [`Self::service_backend_sled()`] to - /// specify that this sled is a backend for some higher-level service. + /// Returns a [`Sled`] that can be used with + /// [`Self::service_backend_sled()`] to specify that this sled is a backend + /// for some higher-level service. /// /// # Errors /// @@ -223,19 +225,11 @@ impl DnsConfigBuilder { } } - pub fn host_scrimlet( - &mut self, - switch_location: SwitchLocation, - addr: SocketAddrV6, - ) -> anyhow::Result<()> { - self.scrimlets.insert(switch_location, addr); - Ok(()) - } - /// Add a new dendrite host of type "zone" to the configuration /// - /// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to - /// specify that this zone is a backend for some higher-level service. + /// Returns a [`Zone`] that can be used with + /// [`Self::service_backend_zone()`] to specify that this zone is a backend + /// for some higher-level service. /// /// # Errors /// @@ -251,8 +245,9 @@ impl DnsConfigBuilder { /// Add a new host of type "zone" to the configuration /// - /// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to - /// specify that this zone is a backend for some higher-level service. + /// Returns a [`Zone`] that can be used with + /// [`Self::service_backend_zone()`] to specify that this zone is a backend + /// for some higher-level service. /// /// # Errors /// @@ -363,6 +358,52 @@ impl DnsConfigBuilder { } } + /// Higher-level shorthand for adding a zone with a single backend service + /// + /// # Errors + /// + /// This function fails only if the given zone has already been added to the + /// configuration. + pub fn host_zone_with_one_backend( + &mut self, + zone_id: Uuid, + addr: Ipv6Addr, + service: ServiceName, + port: u16, + ) -> anyhow::Result<()> { + let zone = self.host_zone(zone_id, addr)?; + self.service_backend_zone(service, &zone, port) + } + + /// Higher-level shorthand for adding a "switch" zone with its usual set of + /// backend services + /// + /// # Errors + /// + /// This function fails only if the given zone has already been added to the + /// configuration. + pub fn host_zone_switch( + &mut self, + sled_id: Uuid, + switch_zone_ip: Ipv6Addr, + dendrite_port: u16, + mgs_port: u16, + mgd_port: u16, + ) -> anyhow::Result<()> { + let zone = self.host_dendrite(sled_id, switch_zone_ip)?; + self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?; + self.service_backend_zone( + ServiceName::ManagementGatewayService, + &zone, + mgs_port, + )?; + self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port) + } + + pub fn generation(&mut self, generation: Generation) { + self.generation = generation; + } + /// Construct a complete [`DnsConfigParams`] (suitable for propagating to /// our DNS servers) for the control plane DNS zone described up to this /// point @@ -378,23 +419,6 @@ impl DnsConfigBuilder { (zone.dns_name(), vec![DnsRecord::Aaaa(zone_ip)]) }); - let scrimlet_srv_records = - self.scrimlets.clone().into_iter().map(|(location, addr)| { - let srv = DnsRecord::Srv(dns_service_client::types::Srv { - prio: 0, - weight: 0, - port: addr.port(), - target: format!("{location}.scrimlet.{}", DNS_ZONE), - }); - (ServiceName::Scrimlet(location).dns_name(), vec![srv]) - }); - - let scrimlet_aaaa_records = - self.scrimlets.into_iter().map(|(location, addr)| { - let aaaa = DnsRecord::Aaaa(*addr.ip()); - (format!("{location}.scrimlet"), vec![aaaa]) - }); - // Assemble the set of SRV records, which implicitly point back at // zones' AAAA records. let srv_records_zones = self.service_instances_zones.into_iter().map( @@ -439,12 +463,10 @@ impl DnsConfigBuilder { .chain(zone_records) .chain(srv_records_sleds) .chain(srv_records_zones) - .chain(scrimlet_aaaa_records) - .chain(scrimlet_srv_records) .collect(); DnsConfigParams { - generation: 1, + generation: u64::from(self.generation), time_created: chrono::Utc::now(), zones: vec![DnsConfigZone { zone_name: DNS_ZONE.to_owned(), diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index bffe6e829a..8cafe4ac97 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -4,7 +4,6 @@ //! Well-known DNS names and related types for internal DNS (see RFD 248) -use omicron_common::api::internal::shared::SwitchLocation; use uuid::Uuid; /// Name for the control plane DNS zone @@ -35,7 +34,6 @@ pub enum ServiceName { InternalNtp, Maghemite, //TODO change to Dpd - maghemite has several services. Mgd, - Scrimlet(SwitchLocation), } impl ServiceName { @@ -59,7 +57,6 @@ impl ServiceName { ServiceName::InternalNtp => "internal-ntp", ServiceName::Maghemite => "maghemite", ServiceName::Mgd => "mgd", - ServiceName::Scrimlet(_) => "scrimlet", } } @@ -91,9 +88,6 @@ impl ServiceName { ServiceName::Crucible(id) => { format!("_{}._tcp.{}", self.service_kind(), id) } - ServiceName::Scrimlet(location) => { - format!("_{location}._scrimlet._tcp") - } } } diff --git a/nexus/blueprint-execution/Cargo.toml b/nexus/blueprint-execution/Cargo.toml index 11d8003599..3284bda27e 100644 --- a/nexus/blueprint-execution/Cargo.toml +++ b/nexus/blueprint-execution/Cargo.toml @@ -8,12 +8,17 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true +dns-service-client.workspace = true futures.workspace = true +internal-dns.workspace = true +nexus-db-model.workspace = true nexus-db-queries.workspace = true nexus-types.workspace = true +omicron-common.workspace = true reqwest.workspace = true sled-agent-client.workspace = true slog.workspace = true +slog-error-chain.workspace = true uuid.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. This is needed @@ -26,9 +31,11 @@ omicron-workspace-hack.workspace = true [dev-dependencies] chrono.workspace = true httptest.workspace = true -nexus-db-model.workspace = true +ipnet.workspace = true +nexus-deployment.workspace = true +nexus-inventory.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true -omicron-common.workspace = true omicron-nexus.workspace = true +omicron-test-utils.workspace = true tokio.workspace = true diff --git a/nexus/blueprint-execution/src/dns.rs b/nexus/blueprint-execution/src/dns.rs new file mode 100644 index 0000000000..6dd9266f32 --- /dev/null +++ b/nexus/blueprint-execution/src/dns.rs @@ -0,0 +1,745 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Propagates internal DNS changes in a given blueprint + +use crate::Sled; +use dns_service_client::DnsDiff; +use internal_dns::DnsConfigBuilder; +use internal_dns::ServiceName; +use nexus_db_model::DnsGroup; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::OmicronZoneType; +use nexus_types::internal_api::params::DnsConfigParams; +use omicron_common::address::get_switch_zone_address; +use omicron_common::address::CLICKHOUSE_KEEPER_PORT; +use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::COCKROACH_PORT; +use omicron_common::address::CRUCIBLE_PANTRY_PORT; +use omicron_common::address::CRUCIBLE_PORT; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::DNS_HTTP_PORT; +use omicron_common::address::MGD_PORT; +use omicron_common::address::MGS_PORT; +use omicron_common::address::NEXUS_INTERNAL_PORT; +use omicron_common::address::NTP_PORT; +use omicron_common::address::OXIMETER_PORT; +use omicron_common::api::external::Error; +use omicron_common::api::external::Generation; +use omicron_common::api::external::InternalContext; +use slog::{debug, info, o}; +use std::collections::BTreeMap; +use uuid::Uuid; + +pub(crate) async fn deploy_dns( + opctx: &OpContext, + datastore: &DataStore, + creator: String, + blueprint: &Blueprint, + sleds_by_id: &BTreeMap, +) -> Result<(), Error> { + // First, fetch the current DNS config. + let dns_config_current = datastore + .dns_config_read(opctx, DnsGroup::Internal) + .await + .internal_context("reading current DNS")?; + + // We could check here that the DNS version we found isn't newer than when + // the blueprint was generated. But we have to check later when we try to + // update the database anyway. And we're not wasting much effort allowing + // this proceed for now. This way, we have only one code path for this and + // we know it's being hit when we exercise this condition. + + // Next, construct the DNS config represented by the blueprint. + let dns_config_blueprint = blueprint_dns_config(blueprint, sleds_by_id); + + // Looking at the current contents of DNS, prepare an update that will make + // it match what it should be. + let log = opctx.log.new(o!("blueprint_execution" => "DNS")); + let comment = format!("blueprint {} ({})", blueprint.id, blueprint.comment); + let maybe_update = dns_compute_update( + &log, + comment, + creator, + &dns_config_current, + &dns_config_blueprint, + )?; + let Some(update) = maybe_update else { + // Nothing to do. + return Ok(()); + }; + + // Our goal here is to update the DNS configuration stored in the database + // to match the blueprint. But it's always possible that we're executing a + // blueprint that's no longer the current target. In that case, we want to + // fail without making any changes. We definitely don't want to + // accidentally clobber changes that have been made by another instance + // executing a newer target blueprint. + // + // To avoid this problem, before generating a blueprint, Nexus fetches the + // current internal DNS generation and stores that into the blueprint + // itself. Here, when we execute the blueprint, we make our database update + // conditional on that still being the current internal DNS generation. + // If some other instance has already come along and updated the database, + // whether for this same blueprint or a newer one, our attempt to update the + // database will fail. + // + // Let's look at a tricky example. Suppose: + // + // 1. The system starts with some initial blueprint B1 with DNS version 3. + // The blueprint has been fully executed and all is well. + // + // 2. Blueprint B2 gets generated. It stores DNS version 3. It's made the + // current target. Execution has not started yet. + // + // 3. Blueprint B3 gets generated. It also stores DNS version 3 because + // that's still the current version in DNS. B3 is made the current + // target. + // + // Assume B2 and B3 specify different internal DNS contents (e.g., have a + // different set of Omicron zones in them). + // + // 4. Nexus instance N1 finds B2 to be the current target and starts + // executing it. (Assume it found this between 2 and 3 above.) + // + // 5. Nexus instance N2 finds B3 to be the current target and starts + // executing it. + // + // During execution: + // + // * N1 will assemble a new version of DNS called version 4, generate a diff + // between version 3 (which is fixed) and version 4, and attempt to apply + // this to the database conditional on the current version being version + // 3. + // + // * N2 will do the same, but its version 4 will look different. + // + // Now, one of two things could happen: + // + // 1. N1 wins. Its database update applies successfully. In the database, + // the internal DNS version becomes version 4. In this case, N2 loses. + // Its database operation fails altogether. At this point, any + // subsequent attempt to execute blueprint B3 will fail because any DNS + // update will be conditional on the database having version 3. The only + // way out of this is for the planner to generate a new blueprint B4 + // that's exactly equivalent to B3 except that the stored internal DNS + // version is 4. Then we'll be able to execute that. + // + // 2. N2 wins. Its database update applies successfully. In the database, + // the internal DNS version becomes version 4. In this case, N1 loses. + // Its database operation fails altogether. At this point, any + // subsequent attempt to execute blueprint B3 will fail because any DNS + // update will be conditional on the databae having version 3. No + // further action is needed, though, because we've successfully executed + // the latest target blueprint. + // + // In both cases, the system will (1) converge to having successfully + // executed the target blueprint, and (2) never have rolled any changes back + // -- DNS only ever moves forward, closer to the latest desired state. + info!( + log, + "attempting to update from generation {} to generation {}", + dns_config_current.generation, + dns_config_blueprint.generation, + ); + let generation_u32 = + u32::try_from(dns_config_current.generation).map_err(|e| { + Error::internal_error(&format!( + "internal DNS generation got too large: {}", + e, + )) + })?; + let generation = + nexus_db_model::Generation::from(Generation::from(generation_u32)); + datastore.dns_update_from_version(opctx, update, generation).await +} + +/// Returns the expected contents of internal DNS based on the given blueprint +pub fn blueprint_dns_config( + blueprint: &Blueprint, + sleds_by_id: &BTreeMap, +) -> DnsConfigParams { + // The DNS names configured here should match what RSS configures for the + // same zones. It's tricky to have RSS share the same code because it uses + // Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're + // using `sled-agent-client`'s version of that type. However, the + // DnsConfigBuilder's interface is high-level enough that it handles most of + // the details. + let mut dns_builder = DnsConfigBuilder::new(); + + // The code below assumes that all zones are using the default port numbers. + // That should be true, as those are the only ports ever used today. + // In an ideal world, the correct port would be pulled out of the + // `OmicronZoneType` variant instead. Although that information is present, + // it's irritatingly non-trivial to do right now because SocketAddrs are + // represented as strings, so we'd need to parse all of them and handle all + // the errors, even though they should never happen. + // See oxidecomputer/omicron#4988. + for (_, omicron_zone) in blueprint.all_omicron_zones() { + if !blueprint.zones_in_service.contains(&omicron_zone.id) { + continue; + } + + let (service_name, port) = match omicron_zone.zone_type { + OmicronZoneType::BoundaryNtp { .. } => { + (ServiceName::BoundaryNtp, NTP_PORT) + } + OmicronZoneType::InternalNtp { .. } => { + (ServiceName::InternalNtp, NTP_PORT) + } + OmicronZoneType::Clickhouse { .. } => { + (ServiceName::Clickhouse, CLICKHOUSE_PORT) + } + OmicronZoneType::ClickhouseKeeper { .. } => { + (ServiceName::ClickhouseKeeper, CLICKHOUSE_KEEPER_PORT) + } + OmicronZoneType::CockroachDb { .. } => { + (ServiceName::Cockroach, COCKROACH_PORT) + } + OmicronZoneType::Nexus { .. } => { + (ServiceName::Nexus, NEXUS_INTERNAL_PORT) + } + OmicronZoneType::Crucible { .. } => { + (ServiceName::Crucible(omicron_zone.id), CRUCIBLE_PORT) + } + OmicronZoneType::CruciblePantry { .. } => { + (ServiceName::CruciblePantry, CRUCIBLE_PANTRY_PORT) + } + OmicronZoneType::Oximeter { .. } => { + (ServiceName::Oximeter, OXIMETER_PORT) + } + OmicronZoneType::ExternalDns { .. } => { + (ServiceName::ExternalDns, DNS_HTTP_PORT) + } + OmicronZoneType::InternalDns { .. } => { + (ServiceName::InternalDns, DNS_HTTP_PORT) + } + }; + + // This unwrap is safe because this function only fails if we provide + // the same zone id twice, which should not be possible here. + dns_builder + .host_zone_with_one_backend( + omicron_zone.id, + omicron_zone.underlay_address, + service_name, + port, + ) + .unwrap(); + } + + let scrimlets = sleds_by_id.values().filter(|sled| sled.is_scrimlet); + for scrimlet in scrimlets { + let sled_subnet = scrimlet.subnet(); + let switch_zone_ip = get_switch_zone_address(sled_subnet); + // unwrap(): see above. + dns_builder + .host_zone_switch( + scrimlet.id, + switch_zone_ip, + DENDRITE_PORT, + MGS_PORT, + MGD_PORT, + ) + .unwrap(); + } + + // We set the generation number for the internal DNS to be newer than + // whatever it was when this blueprint was generated. This will only be + // used if the generated DNS contents are different from what's current. + dns_builder.generation(blueprint.internal_dns_version.next()); + dns_builder.build() +} + +fn dns_compute_update( + log: &slog::Logger, + comment: String, + creator: String, + current_config: &DnsConfigParams, + new_config: &DnsConfigParams, +) -> Result, Error> { + let mut update = + DnsVersionUpdateBuilder::new(DnsGroup::Internal, comment, creator); + + let diff = DnsDiff::new(¤t_config, &new_config) + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + if diff.is_empty() { + info!(log, "no changes"); + return Ok(None); + } + + for (name, new_records) in diff.names_added() { + debug!( + log, + "adding name"; + "name" => name, + "new_records" => ?new_records, + ); + update.add_name( + name.to_string(), + new_records.into_iter().cloned().collect(), + )?; + } + + for (name, old_records) in diff.names_removed() { + debug!( + log, + "removing name"; + "name" => name, + "old_records" => ?old_records, + ); + update.remove_name(name.to_string())?; + } + + for (name, old_records, new_records) in diff.names_changed() { + debug!( + log, + "updating name"; + "name" => name, + "old_records" => ?old_records, + "new_records" => ?new_records, + ); + update.remove_name(name.to_string())?; + update.add_name( + name.to_string(), + new_records.into_iter().cloned().collect(), + )?; + } + + Ok(Some(update)) +} + +#[cfg(test)] +mod test { + use super::blueprint_dns_config; + use super::dns_compute_update; + use crate::Sled; + use internal_dns::ServiceName; + use internal_dns::DNS_ZONE; + use nexus_deployment::blueprint_builder::BlueprintBuilder; + use nexus_inventory::CollectionBuilder; + use nexus_types::deployment::Blueprint; + use nexus_types::deployment::OmicronZoneConfig; + use nexus_types::deployment::OmicronZoneType; + use nexus_types::deployment::Policy; + use nexus_types::deployment::SledResources; + use nexus_types::deployment::ZpoolName; + use nexus_types::external_api::views::SledProvisionState; + use nexus_types::internal_api::params::DnsConfigParams; + use nexus_types::internal_api::params::DnsConfigZone; + use nexus_types::internal_api::params::DnsRecord; + use omicron_common::address::get_sled_address; + use omicron_common::address::get_switch_zone_address; + use omicron_common::address::Ipv6Subnet; + use omicron_common::address::RACK_PREFIX; + use omicron_common::address::SLED_PREFIX; + use omicron_common::api::external::Generation; + use omicron_test_utils::dev::test_setup_log; + use std::collections::BTreeMap; + use std::collections::BTreeSet; + use std::collections::HashMap; + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + use std::net::SocketAddrV6; + use std::str::FromStr; + use uuid::Uuid; + + fn blueprint_empty() -> Blueprint { + let builder = CollectionBuilder::new("test-suite"); + let collection = builder.build(); + let policy = Policy { + sleds: BTreeMap::new(), + service_ip_pool_ranges: vec![], + target_nexus_zone_count: 3, + }; + BlueprintBuilder::build_initial_from_collection( + &collection, + Generation::new(), + &policy, + "test-suite", + ) + .expect("failed to generate empty blueprint") + } + + fn dns_config_empty() -> DnsConfigParams { + DnsConfigParams { + generation: 1, + time_created: chrono::Utc::now(), + zones: vec![DnsConfigZone { + zone_name: String::from("internal"), + records: HashMap::new(), + }], + } + } + + /// test blueprint_dns_config(): trivial case of an empty blueprint + #[test] + fn test_blueprint_dns_empty() { + let blueprint = blueprint_empty(); + let blueprint_dns = blueprint_dns_config(&blueprint, &BTreeMap::new()); + assert!(blueprint_dns.sole_zone().unwrap().records.is_empty()); + } + + /// test blueprint_dns_config(): exercise various different conditions + /// - one of each type of zone in service + /// - some zones not in service + #[test] + fn test_blueprint_dns_basic() { + // We'll use the standard representative inventory collection to build a + // blueprint. The main thing we care about here is that we have at + // least one zone of each type. Later, we'll mark a couple of the sleds + // as Scrimlets to exercise that case. + let representative = nexus_inventory::examples::representative(); + let collection = representative.builder.build(); + let rack_subnet_base: Ipv6Addr = + "fd00:1122:3344:0100::".parse().unwrap(); + let rack_subnet = + ipnet::Ipv6Net::new(rack_subnet_base, RACK_PREFIX).unwrap(); + let possible_sled_subnets = rack_subnet.subnets(SLED_PREFIX).unwrap(); + // Ignore sleds with no associated zones in the inventory. + // This is included in the "representative" collection, but it's + // not allowed by BlueprintBuilder::build_initial_from_collection(). + let policy_sleds = collection + .omicron_zones + .keys() + .zip(possible_sled_subnets) + .map(|(sled_id, subnet)| { + let sled_resources = SledResources { + provision_state: SledProvisionState::Provisionable, + zpools: BTreeSet::from([ZpoolName::from_str(&format!( + "oxp_{}", + Uuid::new_v4() + )) + .unwrap()]), + subnet: Ipv6Subnet::new(subnet.network()), + }; + (*sled_id, sled_resources) + }) + .collect(); + + let policy = Policy { + sleds: policy_sleds, + service_ip_pool_ranges: vec![], + target_nexus_zone_count: 3, + }; + let dns_empty = dns_config_empty(); + let initial_dns_generation = + Generation::from(u32::try_from(dns_empty.generation).unwrap()); + let mut blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + initial_dns_generation, + &policy, + "test-suite", + ) + .expect("failed to build initial blueprint"); + + // To make things slightly more interesting, let's add a zone that's + // not currently in service. + let out_of_service_id = Uuid::new_v4(); + let out_of_service_addr = Ipv6Addr::LOCALHOST; + blueprint.omicron_zones.values_mut().next().unwrap().zones.push( + OmicronZoneConfig { + id: out_of_service_id, + underlay_address: out_of_service_addr, + zone_type: OmicronZoneType::Oximeter { + address: SocketAddrV6::new( + out_of_service_addr, + 12345, + 0, + 0, + ) + .to_string(), + }, + }, + ); + + // To generate the blueprint's DNS config, we need to make up a + // different set of information about the sleds in our fake system. + let sleds_by_id = policy + .sleds + .iter() + .enumerate() + .map(|(i, (sled_id, sled_resources))| { + let sled_info = Sled { + id: *sled_id, + sled_agent_address: get_sled_address(sled_resources.subnet), + // The first two of these (arbitrarily) will be marked + // Scrimlets. + is_scrimlet: i < 2, + }; + (*sled_id, sled_info) + }) + .collect(); + + let dns_config_blueprint = + blueprint_dns_config(&blueprint, &sleds_by_id); + assert_eq!( + dns_config_blueprint.generation, + u64::from(initial_dns_generation.next()) + ); + let blueprint_dns_zone = dns_config_blueprint.sole_zone().unwrap(); + assert_eq!(blueprint_dns_zone.zone_name, DNS_ZONE); + + // Now, verify a few different properties about the generated DNS + // configuration: + // + // 1. Every zone (except for the one that we added not-in-service) + // should have some DNS name with a AAAA record that points at the + // zone's underlay IP. (i.e., every Omiron zone is _in_ DNS) + // + // 2. Every SRV record that we find should have a "target" that points + // to another name within the DNS configuration, and that name should + // be one of the ones with a AAAA record pointing to an Omicron zone. + // + // 3. There is at least one SRV record for each service that we expect + // to appear in the representative system that we're working with. + // + // 4. Our out-of-service zone does *not* appear in the DNS config, + // neither with an AAAA record nor in an SRV record. + // + // Together, this tells us that we have SRV records for all services, + // that those SRV records all point to at least one of the Omicron zones + // for that service, and that we correctly ignored zones that were not + // in service. + + // To start, we need a mapping from underlay IP to the corresponding + // Omicron zone. + let mut omicron_zones_by_ip: BTreeMap<_, _> = blueprint + .all_omicron_zones() + .filter(|(_, zone)| zone.id != out_of_service_id) + .map(|(_, zone)| (zone.underlay_address, zone.id)) + .collect(); + println!("omicron zones by IP: {:#?}", omicron_zones_by_ip); + + // We also want a mapping from underlay IP to the corresponding switch + // zone. In this case, the value is the Scrimlet's sled id. + let mut switch_sleds_by_ip: BTreeMap<_, _> = sleds_by_id + .iter() + .filter_map(|(sled_id, sled)| { + if sled.is_scrimlet { + let sled_subnet = policy.sleds.get(sled_id).unwrap().subnet; + let switch_zone_ip = get_switch_zone_address(sled_subnet); + Some((switch_zone_ip, *sled_id)) + } else { + None + } + }) + .collect(); + + // Now go through all the DNS names that have AAAA records and remove + // any corresponding Omicron zone. While doing this, construct a set of + // the fully-qualified DNS names (i.e., with the zone name suffix + // appended) that had AAAA records. We'll use this later to make sure + // all the SRV records' targets that we find are valid. + let mut expected_srv_targets: BTreeSet<_> = BTreeSet::new(); + for (name, records) in &blueprint_dns_zone.records { + let addrs: Vec<_> = records + .iter() + .filter_map(|dns_record| match dns_record { + DnsRecord::Aaaa(addr) => Some(addr), + _ => None, + }) + .collect(); + for addr in addrs { + if let Some(zone_id) = omicron_zones_by_ip.remove(addr) { + println!( + "IP {} found in DNS corresponds with zone {}", + addr, zone_id + ); + expected_srv_targets.insert(format!( + "{}.{}", + name, blueprint_dns_zone.zone_name + )); + continue; + } + + if let Some(scrimlet_id) = switch_sleds_by_ip.remove(addr) { + println!( + "IP {} found in DNS corresponds with switch zone \ + for Scrimlet {}", + addr, scrimlet_id + ); + expected_srv_targets.insert(format!( + "{}.{}", + name, blueprint_dns_zone.zone_name + )); + continue; + } + + println!( + "note: found IP ({}) not corresponding to any \ + Omicron zone or switch zone (name {:?})", + addr, name + ); + } + } + + println!( + "Omicron zones whose IPs were not found in DNS: {:?}", + omicron_zones_by_ip, + ); + assert!( + omicron_zones_by_ip.is_empty(), + "some Omicron zones' IPs were not found in DNS" + ); + + println!( + "Scrimlets whose switch zone IPs were not found in DNS: {:?}", + switch_sleds_by_ip, + ); + assert!( + switch_sleds_by_ip.is_empty(), + "some switch zones' IPs were not found in DNS" + ); + + // Now go through all DNS names that have SRV records. For each one, + // + // 1. If its name corresponds to the name of one of the SRV services + // that we expect the system to have, record that fact. At the end + // we'll verify that we found at least one SRV record for each such + // service. + // + // 2. Make sure that the SRV record points at a name that we found in + // the previous pass (i.e., that corresponds to an Omicron zone). + // + // There are some ServiceNames missing here because they are not part of + // our representative config (e.g., ClickhouseKeeper) or they don't + // currently have DNS record at all (e.g., SledAgent, Maghemite, Mgd, + // Tfport). + let mut srv_kinds_expected = BTreeSet::from([ + ServiceName::Clickhouse, + ServiceName::Cockroach, + ServiceName::InternalDns, + ServiceName::ExternalDns, + ServiceName::Nexus, + ServiceName::Oximeter, + ServiceName::Dendrite, + ServiceName::CruciblePantry, + ServiceName::BoundaryNtp, + ServiceName::InternalNtp, + ]); + + for (name, records) in &blueprint_dns_zone.records { + let srvs: Vec<_> = records + .iter() + .filter_map(|dns_record| match dns_record { + DnsRecord::Srv(srv) => Some(srv), + _ => None, + }) + .collect(); + for srv in srvs { + assert!( + expected_srv_targets.contains(&srv.target), + "found SRV record with target {:?} that does not \ + correspond to a name that points to any Omicron zone", + srv.target + ); + } + + let kinds_left: Vec<_> = + srv_kinds_expected.iter().copied().collect(); + for kind in kinds_left { + if kind.dns_name() == *name { + srv_kinds_expected.remove(&kind); + } + } + } + + println!("SRV kinds with no records found: {:?}", srv_kinds_expected); + assert!(srv_kinds_expected.is_empty()); + } + + #[test] + fn test_dns_compute_update() { + let logctx = test_setup_log("dns_compute_update"); + + // Start with an empty DNS config. There's no database update needed + // when updating the DNS config to itself. + let dns_empty = dns_config_empty(); + match dns_compute_update( + &logctx.log, + "test-suite".to_string(), + "test-suite".to_string(), + &dns_empty, + &dns_empty, + ) { + Ok(None) => (), + Err(error) => { + panic!("unexpected error generating update: {:?}", error) + } + Ok(Some(diff)) => panic!("unexpected delta: {:?}", diff), + }; + + // Now let's do something a little less trivial. Set up two slightly + // different DNS configurations, compute the database update, and make + // sure it matches what we expect. + let dns_config1 = DnsConfigParams { + generation: 4, + time_created: chrono::Utc::now(), + zones: vec![DnsConfigZone { + zone_name: "my-zone".to_string(), + records: HashMap::from([ + ( + "ex1".to_string(), + vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], + ), + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.3".parse().unwrap())], + ), + ]), + }], + }; + + let dns_config2 = DnsConfigParams { + generation: 4, + time_created: chrono::Utc::now(), + zones: vec![DnsConfigZone { + zone_name: "my-zone".to_string(), + records: HashMap::from([ + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.4".parse().unwrap())], + ), + ( + "ex3".to_string(), + vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], + ), + ]), + }], + }; + + let update = dns_compute_update( + &logctx.log, + "test-suite".to_string(), + "test-suite".to_string(), + &dns_config1, + &dns_config2, + ) + .expect("failed to compute update") + .expect("unexpectedly produced no update"); + + let mut removed: Vec<_> = update.names_removed().collect(); + removed.sort(); + assert_eq!(removed, vec!["ex1", "ex2"]); + + let mut added: Vec<_> = update.names_added().collect(); + added.sort_by_key(|n| n.0); + assert_eq!( + added, + vec![ + ( + "ex2", + [DnsRecord::A("192.168.1.4".parse().unwrap())].as_ref() + ), + ("ex3", [DnsRecord::A(Ipv4Addr::LOCALHOST)].as_ref()), + ] + ); + + logctx.cleanup_successful(); + } +} diff --git a/nexus/blueprint-execution/src/lib.rs b/nexus/blueprint-execution/src/lib.rs index f7bfd7d30c..a13acdf265 100644 --- a/nexus/blueprint-execution/src/lib.rs +++ b/nexus/blueprint-execution/src/lib.rs @@ -6,29 +6,87 @@ //! //! See `nexus_deployment` crate-level docs for background. +use anyhow::{anyhow, Context}; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; -use slog::o; +use nexus_types::identity::Asset; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use slog::info; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use std::net::SocketAddrV6; +use uuid::Uuid; +mod dns; mod omicron_zones; +struct Sled { + id: Uuid, + sled_agent_address: SocketAddrV6, + is_scrimlet: bool, +} + +impl Sled { + pub fn subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(*self.sled_agent_address.ip()) + } +} + +impl From for Sled { + fn from(value: nexus_db_model::Sled) -> Self { + Sled { + id: value.id(), + sled_agent_address: value.address(), + is_scrimlet: value.is_scrimlet(), + } + } +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, blueprint: &Blueprint, -) -> Result<(), Vec> { - let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); - omicron_zones::deploy_zones( - &log, - opctx, - datastore, - &blueprint.omicron_zones, + nexus_label: S, +) -> Result<(), Vec> +where + String: From, +{ + let opctx = opctx.child(BTreeMap::from([( + "comment".to_string(), + blueprint.comment.clone(), + )])); + + info!( + opctx.log, + "attempting to realize blueprint"; + "blueprint_id" => ?blueprint.id + ); + + let sleds_by_id: BTreeMap = datastore + .sled_list_all_batched(&opctx) + .await + .context("listing all sleds") + .map_err(|e| vec![e])? + .into_iter() + .map(|db_sled| (db_sled.id(), Sled::from(db_sled))) + .collect(); + omicron_zones::deploy_zones(&opctx, &sleds_by_id, &blueprint.omicron_zones) + .await?; + + dns::deploy_dns( + &opctx, + &datastore, + String::from(nexus_label), + &blueprint, + &sleds_by_id, ) .await + .map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))]) } diff --git a/nexus/blueprint-execution/src/omicron_zones.rs b/nexus/blueprint-execution/src/omicron_zones.rs index f3e81d283d..1d5c4444b1 100644 --- a/nexus/blueprint-execution/src/omicron_zones.rs +++ b/nexus/blueprint-execution/src/omicron_zones.rs @@ -4,50 +4,49 @@ //! Manges deployment of Omicron zones to Sled Agents +use crate::Sled; +use anyhow::anyhow; use anyhow::Context; use futures::stream; use futures::StreamExt; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::DataStore; use nexus_types::deployment::OmicronZonesConfig; use sled_agent_client::Client as SledAgentClient; use slog::info; use slog::warn; -use slog::Logger; use std::collections::BTreeMap; use uuid::Uuid; /// Idempotently ensure that the specified Omicron zones are deployed to the /// corresponding sleds pub(crate) async fn deploy_zones( - log: &Logger, opctx: &OpContext, - datastore: &DataStore, + sleds_by_id: &BTreeMap, zones: &BTreeMap, ) -> Result<(), Vec> { let errors: Vec<_> = stream::iter(zones) .filter_map(|(sled_id, config)| async move { - let client = match sled_client(opctx, datastore, *sled_id).await { - Ok(client) => client, - Err(err) => { - warn!(log, "{err:#}"); + let db_sled = match sleds_by_id.get(sled_id) { + Some(sled) => sled, + None => { + let err = anyhow!("sled not found in db list: {}", sled_id); + warn!(opctx.log, "{err:#}"); return Some(err); } }; + let client = sled_client(opctx, &db_sled); let result = client.omicron_zones_put(&config).await.with_context(|| { format!("Failed to put {config:#?} to sled {sled_id}") }); - match result { Err(error) => { - warn!(log, "{error:#}"); + warn!(opctx.log, "{error:#}"); Some(error) } Ok(_) => { info!( - log, + opctx.log, "Successfully deployed zones for sled agent"; "sled_id" => %sled_id, "generation" => %config.generation, @@ -71,43 +70,27 @@ pub(crate) async fn deploy_zones( // method on the `Nexus` type. We want to have a more constrained type we can // pass into background tasks for this type of functionality, but for now we // just copy the functionality. -async fn sled_client( - opctx: &OpContext, - datastore: &DataStore, - sled_id: Uuid, -) -> Result { - let (.., sled) = LookupPath::new(opctx, datastore) - .sled_id(sled_id) - .fetch() - .await - .with_context(|| { - format!( - "Failed to create sled_agent::Client for sled_id: {}", - sled_id - ) - })?; +fn sled_client(opctx: &OpContext, sled: &Sled) -> SledAgentClient { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .unwrap(); - Ok(SledAgentClient::new_with_client( - &format!("http://{}", sled.address()), + SledAgentClient::new_with_client( + &format!("http://{}", sled.sled_agent_address), client, opctx.log.clone(), - )) + ) } #[cfg(test)] mod test { use super::deploy_zones; + use crate::Sled; use httptest::matchers::{all_of, json_decoded, request}; use httptest::responders::status_code; use httptest::Expectation; - use nexus_db_model::{ - ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, - }; use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::OmicronZonesConfig; @@ -140,6 +123,7 @@ mod test { omicron_zones, zones_in_service: BTreeSet::new(), parent_blueprint_id: None, + internal_dns_version: Generation::new(), time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), @@ -155,14 +139,6 @@ mod test { cptestctx.logctx.log.clone(), datastore.clone(), ); - let log = opctx.log.clone(); - - // Get a success result back when the blueprint has an empty set of - // zones. - let blueprint = Arc::new(create_blueprint(BTreeMap::new())); - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) - .await - .expect("failed to deploy no zones"); // Create some fake sled-agent servers to respond to zone puts and add // sleds to CRDB. @@ -170,67 +146,61 @@ mod test { let mut s2 = httptest::Server::run(); let sled_id1 = Uuid::new_v4(); let sled_id2 = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); - for (i, (sled_id, server)) in - [(sled_id1, &s1), (sled_id2, &s2)].iter().enumerate() - { - let SocketAddr::V6(addr) = server.addr() else { - panic!("Expected Ipv6 address. Got {}", server.addr()); - }; - let update = SledUpdate::new( - *sled_id, - addr, - SledBaseboard { - serial_number: i.to_string(), - part_number: "test".into(), - revision: 1, - }, - SledSystemHardware { - is_scrimlet: false, - usable_hardware_threads: 4, - usable_physical_ram: ByteCount(1000.into()), - reservoir_size: ByteCount(999.into()), - }, - rack_id, - ); - datastore - .sled_upsert(update) - .await - .expect("Failed to insert sled to db"); - } - - // The particular dataset doesn't matter for this test. - // We re-use the same one to not obfuscate things - let dataset = OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()).parse().unwrap(), - }; + let sleds_by_id: BTreeMap = + [(sled_id1, &s1), (sled_id2, &s2)] + .into_iter() + .map(|(sled_id, server)| { + let SocketAddr::V6(addr) = server.addr() else { + panic!("Expected Ipv6 address. Got {}", server.addr()); + }; + let sled = Sled { + id: sled_id, + sled_agent_address: addr, + is_scrimlet: false, + }; + (sled_id, sled) + }) + .collect(); - let generation = Generation::new(); + // Get a success result back when the blueprint has an empty set of + // zones. + let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy no zones"); // Zones are updated in a particular order, but each request contains // the full set of zones that must be running. // See `rack_setup::service::ServiceInner::run` for more details. - let mut zones = OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { - dataset, - dns_address: "oh-hello-internal-dns".into(), - gz_address: "::1".parse().unwrap(), - gz_address_index: 0, - http_address: "some-ipv6-address".into(), - }, - }], - }; + fn make_zones() -> OmicronZonesConfig { + OmicronZonesConfig { + generation: Generation::new(), + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), + }, + }], + } + } // Create a blueprint with only the `InternalDns` zone for both servers // We reuse the same `OmicronZonesConfig` because the details don't // matter for this test. + let mut zones1 = make_zones(); + let mut zones2 = make_zones(); let blueprint = Arc::new(create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), + (sled_id1, zones1.clone()), + (sled_id2, zones2.clone()), ]))); // Set expectations for the initial requests sent to the fake @@ -250,7 +220,7 @@ mod test { } // Execute it. - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy initial zones"); @@ -267,7 +237,7 @@ mod test { .respond_with(status_code(204)), ); } - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy same zones"); s1.verify_and_clear(); @@ -291,7 +261,7 @@ mod test { ); let errors = - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect_err("unexpectedly succeeded in deploying zones"); @@ -304,21 +274,25 @@ mod test { s2.verify_and_clear(); // Add an `InternalNtp` zone for our next update - zones.generation = generation.next(); - zones.zones.push(OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalNtp { - address: "::1".into(), - dns_servers: vec!["::1".parse().unwrap()], - domain: None, - ntp_servers: vec!["some-ntp-server-addr".into()], - }, - }); + fn append_zone(zones: &mut OmicronZonesConfig) { + zones.generation = zones.generation.next(); + zones.zones.push(OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalNtp { + address: "::1".into(), + dns_servers: vec!["::1".parse().unwrap()], + domain: None, + ntp_servers: vec!["some-ntp-server-addr".into()], + }, + }); + } + append_zone(&mut zones1); + append_zone(&mut zones2); let blueprint = Arc::new(create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), + (sled_id1, zones1), + (sled_id2, zones2), ]))); // Set our new expectations @@ -337,7 +311,7 @@ mod test { } // Activate the task - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy last round of zones"); s1.verify_and_clear(); diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index 34fe08d78c..a1f285fbef 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -23,6 +23,7 @@ use uuid::Uuid; pub struct Blueprint { pub id: Uuid, pub parent_blueprint_id: Option, + pub internal_dns_version: Generation, pub time_created: DateTime, pub creator: String, pub comment: String, @@ -33,6 +34,7 @@ impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { Self { id: bp.id, parent_blueprint_id: bp.parent_blueprint_id, + internal_dns_version: Generation(bp.internal_dns_version), time_created: bp.time_created, creator: bp.creator.clone(), comment: bp.comment.clone(), @@ -45,6 +47,7 @@ impl From for nexus_types::deployment::BlueprintMetadata { Self { id: value.id, parent_blueprint_id: value.parent_blueprint_id, + internal_dns_version: *value.internal_dns_version, time_created: value.time_created, creator: value.creator, comment: value.comment, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 3d9050b1af..54755486e5 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(35, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(36, 0, 0); table! { disk (id) { @@ -1421,6 +1421,8 @@ table! { time_created -> Timestamptz, creator -> Text, comment -> Text, + + internal_dns_version -> Int8, } } diff --git a/nexus/db-queries/src/context.rs b/nexus/db-queries/src/context.rs index 835256fc58..72da01a5f1 100644 --- a/nexus/db-queries/src/context.rs +++ b/nexus/db-queries/src/context.rs @@ -257,6 +257,65 @@ impl OpContext { ); result } + + /// Returns an error if we're currently in a context where expensive or + /// complex operations should not be allowed + /// + /// This is intended for checking that we're not trying to perform expensive + /// or complex (multi-step) operations from HTTP request handlers. + /// Generally, expensive or complex operations should be broken up into + /// multiple requests (e.g., pagination). That's for a variety reasons: + /// + /// - DoS mitigation: requests that kick off an arbitrarily-large amount of + /// work can tie up server resources without requiring commensurate + /// resources from the client, which makes it very easy to attack the + /// system + /// + /// - monitoring: it's easier to reason about metrics for operations that + /// are roughly bounded in size (otherwise, things like "requests per + /// second" can become meaningless) + /// + /// - stability: very large database queries have outsize effects on the + /// rest of the system (potentially blocking other clients for extended + /// periods of time, or preventing the database from timely cleanup of + /// invalidated data, etc.) + /// + /// - throttling: separate requests gives us an opportunity to dynamically + /// throttle clients that are hitting the system hard + /// + /// - failure transparency: when one request kicks off a complex + /// (multi-step) operation, it's harder to communicate programmatically + /// why the request failed + /// + /// - retries: when failures happen during smaller operations, clients can + /// retry only the part that failed. When failures happen during complex + /// (multi-step) operations, the client has to retry the whole thing. + /// This is much worse than it sounds: it means that for the request to + /// succeed, _all_ of the suboperations have to succeed. During a period + /// of transient failures, that could be extremely unlikely. With smaller + /// requests, clients can just retry each one until it succeeds without + /// having to retry the requests that already succeeded (whose failures + /// would trigger another attempt at the whole thing). + /// + /// - Simple request-response HTTP is not well-suited to long-running + /// operations. There's no way to communicate progress or even that the + /// request is still going. There's no good way to cancel such a request, + /// either. Clients and proxies often don't expect long requests and + /// apply aggressive timeouts. Depending on the HTTP version, a + /// long-running request can tie up the TCP connection. + pub fn check_complex_operations_allowed(&self) -> Result<(), Error> { + let api_handler = match self.kind { + OpKind::ExternalApiRequest | OpKind::InternalApiRequest => true, + OpKind::Saga | OpKind::Background | OpKind::Test => false, + }; + if api_handler { + Err(Error::internal_error( + "operation not allowed from API handlers", + )) + } else { + Ok(()) + } + } } impl Session for ConsoleSessionWithSiloId { diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index cde6e7e8c6..d9df143022 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -233,7 +233,13 @@ impl DataStore { // Read the metadata from the primary blueprint row, and ensure that it // exists. - let (parent_blueprint_id, time_created, creator, comment) = { + let ( + parent_blueprint_id, + internal_dns_version, + time_created, + creator, + comment, + ) = { use db::schema::blueprint::dsl; let Some(blueprint) = dsl::blueprint @@ -251,6 +257,7 @@ impl DataStore { ( blueprint.parent_blueprint_id, + *blueprint.internal_dns_version, blueprint.time_created, blueprint.creator, blueprint.comment, @@ -479,6 +486,7 @@ impl DataStore { omicron_zones, zones_in_service, parent_blueprint_id, + internal_dns_version, time_created, creator, comment, @@ -1056,6 +1064,7 @@ mod tests { use nexus_types::external_api::views::SledProvisionState; use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::thread_rng; use rand::Rng; @@ -1174,6 +1183,7 @@ mod tests { let policy = policy_from_collection(&collection); let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) @@ -1207,6 +1217,7 @@ mod tests { nexus_inventory::CollectionBuilder::new("test").build(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &EMPTY_POLICY, "test", ) @@ -1332,10 +1343,16 @@ mod tests { policy.sleds.insert(new_sled_id, fake_sled_resources(None)); let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; - // Create a builder for a child blueprint. - let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test") - .expect("failed to create builder"); + // Create a builder for a child blueprint. While we're at it, use a + // different DNS version to test that that works. + let new_dns_version = blueprint1.internal_dns_version.next(); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint1, + new_dns_version, + &policy, + "test", + ) + .expect("failed to create builder"); // Add zones to our new sled. assert_eq!( @@ -1381,8 +1398,9 @@ mod tests { .blueprint_read(&opctx, &authz_blueprint2) .await .expect("failed to read collection back"); - println!("diff: {}", blueprint2.diff(&blueprint_read)); + println!("diff: {}", blueprint2.diff_sleds(&blueprint_read)); assert_eq!(blueprint2, blueprint_read); + assert_eq!(blueprint2.internal_dns_version, new_dns_version); { let mut expected_ids = [blueprint1.id, blueprint2.id]; expected_ids.sort(); @@ -1474,18 +1492,27 @@ mod tests { nexus_inventory::CollectionBuilder::new("test").build(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &EMPTY_POLICY, "test1", ) .unwrap(); - let blueprint2 = - BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") - .expect("failed to create builder") - .build(); - let blueprint3 = - BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") - .expect("failed to create builder") - .build(); + let blueprint2 = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &EMPTY_POLICY, + "test2", + ) + .expect("failed to create builder") + .build(); + let blueprint3 = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &EMPTY_POLICY, + "test3", + ) + .expect("failed to create builder") + .build(); assert_eq!(blueprint1.parent_blueprint_id, None); assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); assert_eq!(blueprint3.parent_blueprint_id, Some(blueprint1.id)); @@ -1574,10 +1601,14 @@ mod tests { // Create a child of blueprint3, and ensure when we set it as the target // with enabled=false, that status is serialized. - let blueprint4 = - BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") - .expect("failed to create builder") - .build(); + let blueprint4 = BlueprintBuilder::new_based_on( + &blueprint3, + Generation::new(), + &EMPTY_POLICY, + "test3", + ) + .expect("failed to create builder") + .build(); assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); let bp4_target = BlueprintTarget { diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index 552ad31487..180764d38c 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -346,8 +346,77 @@ impl DataStore { Ok(()) } + /// Update the configuration of a DNS zone as specified in `update`, + /// conditional on the _current_ DNS version being `old_version`. + /// + /// Unlike `dns_update_incremental()`, this function assumes the caller has + /// already constructed `update` based on a specific DNS version + /// (`old_version`) and only wants to apply these changes if the DNS version + /// in the database has not changed. + /// + /// Also unlike `dns_update_incremental()`, this function creates its own + /// transaction to apply the update. + /// + /// Like `dns_update_incremental()`, **callers almost certainly want to wake + /// up the corresponding Nexus background task to cause these changes to be + /// propagated to the corresponding DNS servers.** + pub async fn dns_update_from_version( + &self, + opctx: &OpContext, + update: DnsVersionUpdateBuilder, + old_version: Generation, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::DNS_CONFIG).await?; + let conn = self.pool_connection_authorized(opctx).await?; + conn.transaction_async(|c| async move { + let zones = self + .dns_zones_list_all_on_connection(opctx, &c, update.dns_group) + .await?; + // This looks like a time-of-check-to-time-of-use race, but this + // approach works because we're inside a transaction and the + // isolation level is SERIALIZABLE. + let version = self + .dns_group_latest_version_conn(opctx, &c, update.dns_group) + .await?; + if version.version != old_version { + return Err(TransactionError::CustomError(Error::conflict( + format!( + "expected current DNS version to be {}, found {}", + *old_version, *version.version, + ), + ))); + } + + self.dns_write_version_internal( + &c, + update, + zones, + Generation(old_version.next()), + ) + .await + }) + .await + .map_err(|e| match e { + TransactionError::CustomError(e) => e, + TransactionError::Database(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } + /// Update the configuration of a DNS zone as specified in `update` /// + /// Unlike `dns_update_from_version()`, this function assumes that the + /// caller's changes are valid regardless of the current DNS configuration. + /// This function fetches the latest version and always writes the updated + /// config as the next version. This is appropriate if the caller is adding + /// something wholly new or removing something that it knows should be + /// present (as in the case when we add or remove DNS names for a Silo, + /// since we control exactly when that happens). This is _not_ appropriate + /// if the caller is making arbitrary changes that might conflict with a + /// concurrent caller. For that, you probably want + /// `dns_update_from_version()`. + /// /// This function runs the body inside a transaction (if no transaction is /// open) or a nested transaction (savepoint, if a transaction is already /// open). Generally, the caller should invoke this function while already @@ -358,14 +427,14 @@ impl DataStore { /// /// It's recommended to put this step last in any transaction because the /// more time elapses between running this function and attempting to commit - /// the transaction, the greater the change of either transaction failure + /// the transaction, the greater the chance of either transaction failure /// due to a conflict error (if some other caller attempts to update the /// same DNS group) or another client blocking (for the same reason). /// /// **Callers almost certainly want to wake up the corresponding Nexus /// background task to cause these changes to be propagated to the /// corresponding DNS servers.** - pub async fn dns_update( + pub async fn dns_update_incremental( &self, opctx: &OpContext, conn: &async_bb8_diesel::Connection, @@ -378,19 +447,32 @@ impl DataStore { .await?; conn.transaction_async(|c| async move { - self.dns_update_internal(opctx, &c, update, zones).await + let version = self + .dns_group_latest_version_conn(opctx, conn, update.dns_group) + .await?; + self.dns_write_version_internal( + &c, + update, + zones, + Generation(version.version.next()), + ) + .await }) .await } // This must only be used inside a transaction. Otherwise, it may make - // invalid changes to the database state. Use `dns_update()` instead. - async fn dns_update_internal( + // invalid changes to the database state. Use one of the `dns_update_*()` + // functions instead. + // + // The caller should already have checked (in the same transaction) that + // their version number is the correct next version. + async fn dns_write_version_internal( &self, - opctx: &OpContext, conn: &async_bb8_diesel::Connection, update: DnsVersionUpdateBuilder, zones: Vec, + new_version_num: Generation, ) -> Result<(), TransactionError> { // TODO-scalability TODO-performance This would be much better as a CTE // for all the usual reasons described in RFD 192. Using an interactive @@ -401,11 +483,6 @@ impl DataStore { // operations fail spuriously as far as the client is concerned). We // expect these problems to be small or unlikely at small scale but // significant as the system scales up. - let dns_group = update.dns_group; - let version = - self.dns_group_latest_version_conn(opctx, conn, dns_group).await?; - let new_version_num = - nexus_db_model::Generation(version.version.next()); let new_version = DnsVersion { dns_group: update.dns_group, version: new_version_num, @@ -498,15 +575,16 @@ impl DataStore { /// and add the name back with different records. /// /// You use this object to build up a _description_ of the changes to the DNS -/// zone's configuration. Then you call [`DataStore::dns_update()`] to apply -/// these changes transactionally to the database. The changes are then -/// propagated asynchronously to the DNS servers. No changes are made (to -/// either the database or the DNS servers) while you modify this object. +/// zone's configuration. Then you call [`DataStore::dns_update_incremental()`] +/// or [`DataStore::dns_update_from_version()`] to apply these changes +/// transactionally to the database. The changes are then propagated +/// asynchronously to the DNS servers. No changes are made (to either the +/// database or the DNS servers) while you modify this object. /// /// This object changes all of the zones associated with a particular DNS group /// because the assumption right now is that they're equivalent. (In practice, /// we should only ever have one zone in each group right now.) -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct DnsVersionUpdateBuilder { dns_group: DnsGroup, comment: String, @@ -596,6 +674,16 @@ impl DnsVersionUpdateBuilder { Ok(()) } } + + pub fn names_removed(&self) -> impl Iterator { + self.names_removed.iter().map(AsRef::as_ref) + } + + pub fn names_added(&self) -> impl Iterator { + self.names_added + .iter() + .map(|(name, list)| (name.as_ref(), list.as_ref())) + } } #[cfg(test)] @@ -1360,8 +1448,8 @@ mod test { } #[tokio::test] - async fn test_dns_update() { - let logctx = dev::test_setup_log("test_dns_update"); + async fn test_dns_update_incremental() { + let logctx = dev::test_setup_log("test_dns_update_incremental"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; let now = Utc::now(); @@ -1457,7 +1545,10 @@ mod test { update.add_name(String::from("n2"), records2.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } // Verify the new config. @@ -1490,7 +1581,10 @@ mod test { update.add_name(String::from("n1"), records12.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1520,7 +1614,10 @@ mod test { update.remove_name(String::from("n1")).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1547,7 +1644,10 @@ mod test { update.add_name(String::from("n1"), records2.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1586,7 +1686,9 @@ mod test { let copctx = opctx.child(std::collections::BTreeMap::new()); let mut fut = conn1 .transaction_async(|c1| async move { - cds.dns_update(&copctx, &c1, update1).await.unwrap(); + cds.dns_update_incremental(&copctx, &c1, update1) + .await + .unwrap(); // Let the outside scope know we've done the update, but we // haven't committed the transaction yet. Wait for them to // tell us to proceed. @@ -1613,7 +1715,10 @@ mod test { String::from("the test suite"), ); update2.add_name(String::from("n1"), records1.clone()).unwrap(); - datastore.dns_update(&opctx, &conn2, update2).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn2, update2) + .await + .unwrap(); // Now let the first one finish. wait2_tx.send(()).unwrap(); @@ -1657,8 +1762,10 @@ mod test { update.remove_name(String::from("n4")).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - let error = - datastore.dns_update(&opctx, &conn, update).await.unwrap_err(); + let error = datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap_err(); let error = match error { TransactionError::CustomError(err) => err, _ => panic!("Unexpected error: {:?}", error), @@ -1687,7 +1794,10 @@ mod test { let conn = datastore.pool_connection_for_tests().await.unwrap(); let error = Error::from( - datastore.dns_update(&opctx, &conn, update).await.unwrap_err(), + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap_err(), ); let msg = error.to_string(); assert!(msg.starts_with("Internal Error: "), "Message: {msg:}"); @@ -1714,4 +1824,117 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_dns_update_from_version() { + let logctx = dev::test_setup_log("test_dns_update_from_version"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // The guts of `dns_update_from_version()` are shared with + // `dns_update_incremental()`. The main cases worth testing here are + // (1) quick check that the happy path works, plus (2) make sure it + // fails when the precondition fails (current version doesn't match + // what's expected). + // + // Start by loading some initial data. + let conn = datastore.pool_connection_for_tests().await.unwrap(); + let initial_data = InitialDnsGroup::new( + DnsGroup::Internal, + "my-zone", + "test-suite", + "test-suite", + HashMap::from([ + ( + "wendell".to_string(), + vec![DnsRecord::Aaaa(Ipv6Addr::LOCALHOST)], + ), + ( + "krabappel".to_string(), + vec![DnsRecord::Aaaa(Ipv6Addr::LOCALHOST)], + ), + ]), + ); + DataStore::load_dns_data(&conn, initial_data) + .await + .expect("failed to insert initial data"); + + // Construct an update and apply it conditional on the current + // generation matching the initial one. This should succeed. + let mut update1 = DnsVersionUpdateBuilder::new( + DnsGroup::Internal, + String::from("test-suite-1"), + String::from("test-suite-1"), + ); + update1.remove_name(String::from("wendell")).unwrap(); + update1 + .add_name( + String::from("nelson"), + vec![DnsRecord::Aaaa(Ipv6Addr::LOCALHOST)], + ) + .unwrap(); + let gen1 = Generation::new(); + datastore + .dns_update_from_version(&opctx, update1, gen1) + .await + .expect("failed to update from first generation"); + + // Now construct another update based on the _first_ version and try to + // apply that. It should not work because the version has changed from + // under us. + let mut update2 = DnsVersionUpdateBuilder::new( + DnsGroup::Internal, + String::from("test-suite-2"), + String::from("test-suite-2"), + ); + update2.remove_name(String::from("krabappel")).unwrap(); + update2 + .add_name( + String::from("hoover"), + vec![DnsRecord::Aaaa(Ipv6Addr::LOCALHOST)], + ) + .unwrap(); + let error = datastore + .dns_update_from_version(&opctx, update2.clone(), gen1) + .await + .expect_err("update unexpectedly succeeded"); + assert!(error + .to_string() + .contains("expected current DNS version to be 1, found 2")); + + // At this point, the database state should reflect the first update but + // not the second. + let config = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("failed to read config"); + let gen2 = nexus_db_model::Generation(gen1.next()); + assert_eq!(u64::from(*gen2), config.generation); + assert_eq!(1, config.zones.len()); + let records = &config.zones[0].records; + assert!(records.contains_key("nelson")); + assert!(!records.contains_key("wendell")); + assert!(records.contains_key("krabappel")); + + // We can apply the second update, as long as we say it's conditional on + // the current generation. + datastore + .dns_update_from_version(&opctx, update2, gen2) + .await + .expect("failed to update from first generation"); + let config = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("failed to read config"); + assert_eq!(u64::from(gen2.next()), config.generation); + assert_eq!(1, config.zones.len()); + let records = &config.zones[0].records; + assert!(records.contains_key("nelson")); + assert!(!records.contains_key("wendell")); + assert!(!records.contains_key("krabappel")); + assert!(records.contains_key("hoover")); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index f9e0be81c1..5f05aa1760 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -105,6 +105,7 @@ pub use instance::InstanceAndActiveVmm; pub use inventory::DataStoreInventoryTest; pub use rack::RackInit; pub use silo::Discoverability; +use std::num::NonZeroU32; pub use switch_port::SwitchPortSettingsCombinedResult; pub use virtual_provisioning_collection::StorageType; pub use volume::read_only_resources_associated_with_volume; @@ -121,6 +122,14 @@ pub const SERVICE_IP_POOL_NAME: &str = "oxide-service-pool"; /// The name of the built-in Project and VPC for Oxide services. pub const SERVICES_DB_NAME: &str = "oxide-services"; +/// "limit" to be used in SQL queries that paginate through large result sets +/// +/// This value is chosen to be small enough to avoid any queries being too +/// expensive. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +pub const SQL_BATCH_SIZE: NonZeroU32 = + unsafe { NonZeroU32::new_unchecked(1000) }; + // Represents a query that is ready to be executed. // // This helper trait lets the statement either be executed or explained. diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 6cfda3c093..df10b1c072 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -282,7 +282,8 @@ impl DataStore { .await?; } - self.dns_update(nexus_opctx, &conn, dns_update).await?; + self.dns_update_incremental(nexus_opctx, &conn, dns_update) + .await?; self.silo_quotas_create( &conn, @@ -420,7 +421,7 @@ impl DataStore { ) .await?; - self.dns_update(dns_opctx, &conn, dns_update).await?; + self.dns_update_incremental(dns_opctx, &conn, dns_update).await?; info!(opctx.log, "deleted silo {}", id); diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 7b94d64418..eb50061272 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`Sled`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -14,11 +15,13 @@ use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledUpdate; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::update_and_check::UpdateAndCheck; use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_types::identity::Asset; use omicron_common::api::external; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -77,6 +80,29 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// List all sleds, making as many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn sled_list_all_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.check_complex_operations_allowed()?; + + let mut all_sleds = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self.sled_list(opctx, &p.current_pagparams()).await?; + paginator = + p.found_batch(&batch, &|s: &nexus_db_model::Sled| s.id()); + all_sleds.extend(batch); + } + Ok(all_sleds) + } + pub async fn sled_reservation_create( &self, opctx: &OpContext, @@ -465,4 +491,54 @@ mod test { limit, } } + + /// Tests listing large numbers of sleds via the batched interface + #[tokio::test] + async fn sled_list_batch() { + let logctx = + dev::test_setup_log("sled_reservation_create_non_provisionable"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let size = usize::try_from(2 * SQL_BATCH_SIZE.get()).unwrap(); + let mut new_sleds = Vec::with_capacity(size); + new_sleds.resize_with(size, test_new_sled_update); + let mut expected_ids: Vec<_> = + new_sleds.iter().map(|s| s.id()).collect(); + expected_ids.sort(); + + // This is essentially the same as `sled_upsert()`. But since we know + // none of these exist already, we can just insert them. And that means + // we can do them all in one SQL statement. This is considerably + // faster. + let values_to_insert: Vec<_> = + new_sleds.into_iter().map(|s| s.into_insertable()).collect(); + let ninserted = { + use db::schema::sled::dsl; + diesel::insert_into(dsl::sled) + .values(values_to_insert) + .execute_async( + &*datastore + .pool_connection_for_tests() + .await + .expect("failed to get connection"), + ) + .await + .expect("failed to insert sled") + }; + assert_eq!(ninserted, size); + + let sleds = datastore + .sled_list_all_batched(&opctx) + .await + .expect("failed to list all sleds"); + // We don't need to sort these ids because the sleds are enumerated in + // id order. + let found_ids: Vec<_> = sleds.into_iter().map(|s| s.id()).collect(); + assert_eq!(expected_ids, found_ids); + assert_eq!(found_ids.len(), size); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 1bf46d34b2..86a9b8da6e 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -100,6 +100,7 @@ pub enum EnsureMultiple { pub struct BlueprintBuilder<'a> { /// previous blueprint, on which this one will be based parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, // These fields are used to allocate resources from sleds. policy: &'a Policy, @@ -128,6 +129,7 @@ impl<'a> BlueprintBuilder<'a> { /// collection (representing no changes from the collection state) pub fn build_initial_from_collection( collection: &'a Collection, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> Result { @@ -174,6 +176,7 @@ impl<'a> BlueprintBuilder<'a> { omicron_zones, zones_in_service, parent_blueprint_id: None, + internal_dns_version, time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), @@ -184,6 +187,7 @@ impl<'a> BlueprintBuilder<'a> { /// starting with no changes from that state pub fn new_based_on( parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> anyhow::Result> { @@ -284,6 +288,7 @@ impl<'a> BlueprintBuilder<'a> { Ok(BlueprintBuilder { parent_blueprint, + internal_dns_version, policy, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZones::new(parent_blueprint), @@ -307,6 +312,7 @@ impl<'a> BlueprintBuilder<'a> { omicron_zones, zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), + internal_dns_version: self.internal_dns_version, time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), @@ -929,6 +935,7 @@ pub mod test { let blueprint_initial = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) @@ -939,7 +946,7 @@ pub mod test { // provide that ourselves. For our purposes though we don't care. let zones_in_service = blueprint_initial.zones_in_service.clone(); let diff = blueprint_initial - .diff_from_collection(&collection, &zones_in_service); + .diff_sleds_from_collection(&collection, &zones_in_service); println!( "collection -> initial blueprint (expected no changes):\n{}", diff @@ -951,13 +958,14 @@ pub mod test { // Test a no-op blueprint. let builder = BlueprintBuilder::new_based_on( &blueprint_initial, + Generation::new(), &policy, "test_basic", ) .expect("failed to create builder"); let blueprint = builder.build(); verify_blueprint(&blueprint); - let diff = blueprint_initial.diff(&blueprint); + let diff = blueprint_initial.diff_sleds(&blueprint); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff @@ -972,15 +980,20 @@ pub mod test { let (collection, mut policy) = example(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) .expect("failed to create initial blueprint"); verify_blueprint(&blueprint1); - let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test_basic") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &policy, + "test_basic", + ) + .expect("failed to create builder"); // The initial blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure @@ -996,7 +1009,7 @@ pub mod test { let blueprint2 = builder.build(); verify_blueprint(&blueprint2); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff @@ -1008,9 +1021,13 @@ pub mod test { // The next step is adding these zones to a new sled. let new_sled_id = Uuid::new_v4(); let _ = policy_add_sled(&mut policy, new_sled_id); - let mut builder = - BlueprintBuilder::new_based_on(&blueprint2, &policy, "test_basic") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint2, + Generation::new(), + &policy, + "test_basic", + ) + .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { @@ -1021,7 +1038,7 @@ pub mod test { let blueprint3 = builder.build(); verify_blueprint(&blueprint3); - let diff = blueprint2.diff(&blueprint3); + let diff = blueprint2.diff_sleds(&blueprint3); println!("expecting new NTP and Crucible zones:\n{}", diff); // No sleds were changed or removed. @@ -1081,6 +1098,9 @@ pub mod test { fn test_add_nexus_with_no_existing_nexus_zones() { let (mut collection, policy) = example(); + // We don't care about the internal DNS version here. + let internal_dns_version = Generation::new(); + // Adding a new Nexus zone currently requires copying settings from an // existing Nexus zone. If we remove all Nexus zones from the // collection, create a blueprint, then try to add a Nexus zone, it @@ -1093,14 +1113,19 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection( &collection, + internal_dns_version, &policy, "test", ) .expect("failed to create initial blueprint"); - let mut builder = - BlueprintBuilder::new_based_on(&parent, &policy, "test") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &parent, + internal_dns_version, + &policy, + "test", + ) + .expect("failed to create builder"); let err = builder .sled_ensure_zone_multiple_nexus( @@ -1124,6 +1149,9 @@ pub mod test { fn test_add_nexus_error_cases() { let (mut collection, policy) = example(); + // We don't care about the internal DNS version here. + let internal_dns_version = Generation::new(); + // Remove the Nexus zone from one of the sleds so that // `sled_ensure_zone_nexus` can attempt to add a Nexus zone to // `sled_id`. @@ -1144,6 +1172,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) @@ -1152,9 +1181,13 @@ pub mod test { { // Attempting to add Nexus to the sled we removed it from (with no // other changes to the environment) should succeed. - let mut builder = - BlueprintBuilder::new_based_on(&parent, &policy, "test") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &parent, + internal_dns_version, + &policy, + "test", + ) + .expect("failed to create builder"); let added = builder .sled_ensure_zone_multiple_nexus(sled_id, 1) .expect("failed to ensure nexus zone"); @@ -1166,9 +1199,13 @@ pub mod test { // Attempting to add multiple Nexus zones to the sled we removed it // from (with no other changes to the environment) should also // succeed. - let mut builder = - BlueprintBuilder::new_based_on(&parent, &policy, "test") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &parent, + internal_dns_version, + &policy, + "test", + ) + .expect("failed to create builder"); let added = builder .sled_ensure_zone_multiple_nexus(sled_id, 3) .expect("failed to ensure nexus zone"); @@ -1194,9 +1231,13 @@ pub mod test { assert!(!used_ip_ranges.is_empty()); policy.service_ip_pool_ranges = used_ip_ranges; - let mut builder = - BlueprintBuilder::new_based_on(&parent, &policy, "test") - .expect("failed to create builder"); + let mut builder = BlueprintBuilder::new_based_on( + &parent, + internal_dns_version, + &policy, + "test", + ) + .expect("failed to create builder"); let err = builder .sled_ensure_zone_multiple_nexus(sled_id, 1) .unwrap_err(); @@ -1247,12 +1288,18 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) .unwrap(); - match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + match BlueprintBuilder::new_based_on( + &parent, + Generation::new(), + &policy, + "test", + ) { Ok(_) => panic!("unexpected success"), Err(err) => assert!( err.to_string().contains("duplicate external IP"), @@ -1290,12 +1337,18 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) .unwrap(); - match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + match BlueprintBuilder::new_based_on( + &parent, + Generation::new(), + &policy, + "test", + ) { Ok(_) => panic!("unexpected success"), Err(err) => assert!( err.to_string().contains("duplicate Nexus NIC IP"), @@ -1333,12 +1386,18 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) .unwrap(); - match BlueprintBuilder::new_based_on(&parent, &policy, "test") { + match BlueprintBuilder::new_based_on( + &parent, + Generation::new(), + &policy, + "test", + ) { Ok(_) => panic!("unexpected success"), Err(err) => assert!( err.to_string().contains("duplicate service vNIC MAC"), diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index cbdcfd80c0..7973157068 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -14,8 +14,8 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; use nexus_types::external_api::views::SledProvisionState; use nexus_types::inventory::Collection; -use slog::warn; -use slog::{info, Logger}; +use omicron_common::api::external::Generation; +use slog::{info, warn, Logger}; use std::collections::BTreeMap; use std::collections::BTreeSet; use uuid::Uuid; @@ -40,14 +40,19 @@ impl<'a> Planner<'a> { pub fn new_based_on( log: Logger, parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. inventory: &'a Collection, ) -> anyhow::Result> { - let blueprint = - BlueprintBuilder::new_based_on(parent_blueprint, policy, creator)?; + let blueprint = BlueprintBuilder::new_based_on( + parent_blueprint, + internal_dns_version, + policy, + creator, + )?; Ok(Planner { log, policy, blueprint, inventory }) } @@ -319,6 +324,9 @@ mod test { fn test_basic_add_sled() { let logctx = test_setup_log("planner_basic_add_sled"); + // For our purposes, we don't care about the internal DNS generation. + let internal_dns_version = Generation::new(); + // Use our example inventory collection. let (mut collection, mut policy) = example(); @@ -326,6 +334,7 @@ mod test { // because there's a separate test for that. let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + internal_dns_version, &policy, "the_test", ) @@ -338,6 +347,7 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, + internal_dns_version, &policy, "no-op?", &collection, @@ -346,7 +356,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!("1 -> 2 (expected no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -362,6 +372,7 @@ mod test { let blueprint3 = Planner::new_based_on( logctx.log.clone(), &blueprint2, + internal_dns_version, &policy, "test: add NTP?", &collection, @@ -370,7 +381,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint2.diff(&blueprint3); + let diff = blueprint2.diff_sleds(&blueprint3); println!("2 -> 3 (expect new NTP zone on new sled):\n{}", diff); let sleds = diff.sleds_added().collect::>(); let (sled_id, sled_zones) = sleds[0]; @@ -394,6 +405,7 @@ mod test { let blueprint4 = Planner::new_based_on( logctx.log.clone(), &blueprint3, + internal_dns_version, &policy, "test: add nothing more", &collection, @@ -401,7 +413,7 @@ mod test { .expect("failed to create planner") .plan() .expect("failed to plan"); - let diff = blueprint3.diff(&blueprint4); + let diff = blueprint3.diff_sleds(&blueprint4); println!("3 -> 4 (expected no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -430,6 +442,7 @@ mod test { let blueprint5 = Planner::new_based_on( logctx.log.clone(), &blueprint3, + internal_dns_version, &policy, "test: add Crucible zones?", &collection, @@ -438,7 +451,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint3.diff(&blueprint5); + let diff = blueprint3.diff_sleds(&blueprint5); println!("3 -> 5 (expect Crucible zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -465,6 +478,7 @@ mod test { let blueprint6 = Planner::new_based_on( logctx.log.clone(), &blueprint5, + internal_dns_version, &policy, "test: no-op?", &collection, @@ -473,7 +487,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint5.diff(&blueprint6); + let diff = blueprint5.diff_sleds(&blueprint6); println!("5 -> 6 (expect no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -489,6 +503,9 @@ mod test { fn test_add_multiple_nexus_to_one_sled() { let logctx = test_setup_log("planner_add_multiple_nexus_to_one_sled"); + // For our purposes, we don't care about the internal DNS generation. + let internal_dns_version = Generation::new(); + // Use our example inventory collection as a starting point, but strip // it down to just one sled. let (sled_id, collection, mut policy) = { @@ -510,6 +527,7 @@ mod test { // Build the initial blueprint. let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + internal_dns_version, &policy, "the_test", ) @@ -536,6 +554,7 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, + internal_dns_version, &policy, "add more Nexus", &collection, @@ -544,7 +563,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!("1 -> 2 (added additional Nexus zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -579,6 +598,7 @@ mod test { // Build the initial blueprint. let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) @@ -602,6 +622,7 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, + Generation::new(), &policy, "add more Nexus", &collection, @@ -610,7 +631,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!("1 -> 2 (added additional Nexus zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -658,6 +679,7 @@ mod test { // Build the initial blueprint. let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) @@ -689,6 +711,7 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, + Generation::new(), &policy, "add more Nexus", &collection, @@ -697,7 +720,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!("1 -> 2 (added additional Nexus zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 84d4cef212..32797facbf 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -19,6 +19,7 @@ use tokio::sync::watch; pub struct BlueprintExecutor { datastore: Arc, rx_blueprint: watch::Receiver>>, + nexus_label: String, } impl BlueprintExecutor { @@ -27,8 +28,9 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, + nexus_label: String, ) -> BlueprintExecutor { - BlueprintExecutor { datastore, rx_blueprint } + BlueprintExecutor { datastore, rx_blueprint, nexus_label } } } @@ -65,6 +67,7 @@ impl BackgroundTask for BlueprintExecutor { opctx, &self.datastore, blueprint, + &self.nexus_label, ) .await; @@ -119,6 +122,7 @@ mod test { fn create_blueprint( omicron_zones: BTreeMap, + internal_dns_version: Generation, ) -> (BlueprintTarget, Blueprint) { let id = Uuid::new_v4(); ( @@ -132,6 +136,7 @@ mod test { omicron_zones, zones_in_service: BTreeSet::new(), parent_blueprint_id: None, + internal_dns_version, time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), @@ -185,7 +190,11 @@ mod test { } let (blueprint_tx, blueprint_rx) = watch::channel(None); - let mut task = BlueprintExecutor::new(datastore.clone(), blueprint_rx); + let mut task = BlueprintExecutor::new( + datastore.clone(), + blueprint_rx, + String::from("test-suite"), + ); // Now we're ready. // @@ -196,7 +205,8 @@ mod test { // With a target blueprint having no zones, the task should trivially // complete and report a successful (empty) summary. - let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + let generation = Generation::new(); + let blueprint = Arc::new(create_blueprint(BTreeMap::new(), generation)); blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); @@ -204,32 +214,35 @@ mod test { // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and - // reporting success. We reuse the same `OmicronZonesConfig` in - // constructing the blueprint because the details don't matter for this - // test. - let zones = OmicronZonesConfig { - generation: Generation::new(), - zones: vec![OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) - .parse() - .unwrap(), + // reporting success. + fn make_zones() -> OmicronZonesConfig { + OmicronZonesConfig { + generation: Generation::new(), + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), }, - dns_address: "oh-hello-internal-dns".into(), - gz_address: "::1".parse().unwrap(), - gz_address_index: 0, - http_address: "some-ipv6-address".into(), - }, - }], - }; - - let mut blueprint = create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), - ])); + }], + } + } + let generation = generation.next(); + let mut blueprint = create_blueprint( + BTreeMap::from([ + (sled_id1, make_zones()), + (sled_id2, make_zones()), + ]), + generation, + ); blueprint_tx.send(Some(Arc::new(blueprint.clone()))).unwrap(); @@ -255,6 +268,8 @@ mod test { // Now, disable the target and make sure that we _don't_ invoke the sled // agent. It's enough to just not set expectations. + blueprint.1.internal_dns_version = + blueprint.1.internal_dns_version.next(); blueprint.0.enabled = false; blueprint_tx.send(Some(Arc::new(blueprint.clone()))).unwrap(); let value = task.activate(&opctx).await; diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs index c34d2ab103..8886df81cd 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/blueprint_load.rs @@ -184,6 +184,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{Blueprint, BlueprintTarget}; + use omicron_common::api::external::Generation; use serde::Deserialize; use std::collections::{BTreeMap, BTreeSet}; use uuid::Uuid; @@ -206,6 +207,7 @@ mod test { omicron_zones: BTreeMap::new(), zones_in_service: BTreeSet::new(), parent_blueprint_id, + internal_dns_version: Generation::new(), time_created: now_db_precision(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 27e58a298c..846051a068 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -228,6 +228,7 @@ impl BackgroundTasks { let blueprint_executor = blueprint_execution::BlueprintExecutor::new( datastore.clone(), rx_blueprint.clone(), + nexus_id.to_string(), ); let task_blueprint_executor = driver.register( String::from("blueprint_executor"), @@ -628,7 +629,7 @@ pub mod test { ) { let conn = datastore.pool_connection_for_tests().await.unwrap(); info!(opctx.log, "writing DNS update..."); - datastore.dns_update(opctx, &conn, update).await.unwrap(); + datastore.dns_update_incremental(opctx, &conn, update).await.unwrap(); } pub(crate) async fn write_test_dns_generation( diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index b8cb6deabf..61ce803d13 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -4,8 +4,10 @@ //! Configuration of the deployment system +use nexus_db_model::DnsGroup; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; @@ -26,6 +28,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; @@ -33,19 +36,15 @@ use omicron_common::api::external::LookupType; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; -/// "limit" used in SQL queries that paginate through all sleds, zpools, etc. -// unsafe: `new_unchecked` is only unsound if the argument is 0. -const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, creator: String, inventory: Option, + internal_dns_version: Generation, } impl super::Nexus { @@ -118,18 +117,7 @@ impl super::Nexus { let creator = self.id.to_string(); let datastore = self.datastore(); - let sled_rows = { - let mut all_sleds = Vec::new(); - let mut paginator = Paginator::new(SQL_BATCH_SIZE); - while let Some(p) = paginator.next() { - let batch = - datastore.sled_list(opctx, &p.current_pagparams()).await?; - paginator = - p.found_batch(&batch, &|s: &nexus_db_model::Sled| s.id()); - all_sleds.extend(batch); - } - all_sleds - }; + let sled_rows = datastore.sled_list_all_batched(opctx).await?; let mut zpools_by_sled_id = { let mut zpools = BTreeMap::new(); @@ -222,6 +210,16 @@ impl super::Nexus { "fetching latest inventory collection for blueprint planner", )?; + // Fetch the current internal DNS version. This could be made part of + // inventory, but it's enough of a one-off that there's no particular + // advantage to doing that work now. + let dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .internal_context( + "fetching internal DNS version for blueprint planning", + )?; + Ok(PlanningContext { creator, policy: Policy { @@ -230,6 +228,7 @@ impl super::Nexus { target_nexus_zone_count: NEXUS_REDUNDANCY, }, inventory, + internal_dns_version: *dns_version.version, }) } @@ -253,6 +252,7 @@ impl super::Nexus { let planning_context = self.blueprint_planning_context(opctx).await?; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, + planning_context.internal_dns_version, &planning_context.policy, &planning_context.creator, ) @@ -287,6 +287,7 @@ impl super::Nexus { let planner = Planner::new_based_on( opctx.log.clone(), &parent_blueprint, + planning_context.internal_dns_version, &planning_context.policy, &planning_context.creator, &inventory, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 2a37721bb0..271025f7a7 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -781,25 +781,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { ); } - pub async fn scrimlet_dns_setup(&mut self) { - let sled_agent = self - .sled_agent - .as_ref() - .expect("Cannot set up scrimlet DNS without sled agent"); - - let sa = match sled_agent.http_server.local_addr() { - SocketAddr::V6(sa) => sa, - SocketAddr::V4(_) => panic!("expected SocketAddrV6 for sled agent"), - }; - - for loc in [SwitchLocation::Switch0, SwitchLocation::Switch1] { - self.rack_init_builder - .internal_dns_config - .host_scrimlet(loc, sa) - .expect("add switch0 scrimlet dns entry"); - } - } - // Set up an external DNS server. pub async fn start_external_dns(&mut self) { let log = self.logctx.log.new(o!("component" => "external_dns_server")); @@ -1063,10 +1044,6 @@ async fn setup_with_config_impl( "start_crucible_pantry", Box::new(|builder| builder.start_crucible_pantry().boxed()), ), - ( - "scrimlet_dns_setup", - Box::new(|builder| builder.scrimlet_dns_setup().boxed()), - ), ( "populate_internal_dns", Box::new(|builder| builder.populate_internal_dns().boxed()), diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 490097d46d..2e683878be 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -135,6 +135,10 @@ pub struct Blueprint { /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, + /// internal DNS version when this blueprint was created + // See blueprint generation for more on this. + pub internal_dns_version: Generation, + /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) @@ -161,8 +165,11 @@ impl Blueprint { self.omicron_zones.keys().copied() } - /// Summarize the difference between two blueprints - pub fn diff<'a>(&'a self, other: &'a Blueprint) -> OmicronZonesDiff<'a> { + /// Summarize the difference between sleds and zones between two blueprints + pub fn diff_sleds<'a>( + &'a self, + other: &'a Blueprint, + ) -> OmicronZonesDiff<'a> { OmicronZonesDiff { before_label: format!("blueprint {}", self.id), before_zones: self.omicron_zones.clone(), @@ -173,14 +180,15 @@ impl Blueprint { } } - /// Summarize the difference between a collection and a blueprint + /// Summarize the differences in sleds and zones between a collection and a + /// blueprint /// /// This gives an idea about what would change about a running system if one /// were to execute the blueprint. /// /// Note that collections do not currently include information about what /// zones are in-service, so the caller must provide that information. - pub fn diff_from_collection<'a>( + pub fn diff_sleds_from_collection<'a>( &'a self, collection: &'a Collection, before_zones_in_service: &'a BTreeSet, @@ -212,6 +220,8 @@ pub struct BlueprintMetadata { /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, + /// internal DNS version when this blueprint was created + pub internal_dns_version: Generation, /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 50e8b380b3..71e8e64d97 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -134,6 +134,14 @@ impl Collection { ) -> impl Iterator { self.omicron_zones.values().flat_map(|z| z.zones.zones.iter()) } + + /// Iterate over the sled ids of sleds identified as Scrimlets + pub fn scrimlets(&self) -> impl Iterator + '_ { + self.sled_agents + .iter() + .filter(|(_, inventory)| inventory.sled_role == SledRole::Scrimlet) + .map(|(sled_id, _)| *sled_id) + } } /// A unique baseboard id found during a collection diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index b8b45aa08e..53a53fb219 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2124,6 +2124,14 @@ "type": "string", "format": "uuid" }, + "internal_dns_version": { + "description": "internal DNS version when this blueprint was created", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, "omicron_zones": { "description": "mapping: sled id -> zones deployed on each sled A sled is considered part of the control plane cluster iff it has an entry in this map.", "type": "object", @@ -2156,6 +2164,7 @@ "comment", "creator", "id", + "internal_dns_version", "omicron_zones", "time_created", "zones_in_service" @@ -2178,6 +2187,14 @@ "type": "string", "format": "uuid" }, + "internal_dns_version": { + "description": "internal DNS version when this blueprint was created", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, "parent_blueprint_id": { "nullable": true, "description": "which blueprint this blueprint is based on", @@ -2194,6 +2211,7 @@ "comment", "creator", "id", + "internal_dns_version", "time_created" ] }, diff --git a/schema/crdb/36.0.0/up1.sql b/schema/crdb/36.0.0/up1.sql new file mode 100644 index 0000000000..38e3843bfc --- /dev/null +++ b/schema/crdb/36.0.0/up1.sql @@ -0,0 +1,8 @@ +-- Add the "internal_dns_version" column to the "blueprint" table. +-- This query will end up setting the internal DNS version for any existing +-- blueprints to 1. This is always safe because it's the smallest possible +-- value and if a value is too small, the end result is simply needing to +-- regenerate the blueprint in order to be able to execute it. (On the other +-- hand, using a value that's too large could cause corruption.) +ALTER TABLE omicron.public.blueprint + ADD COLUMN IF NOT EXISTS internal_dns_version INT8 NOT NULL DEFAULT 1; diff --git a/schema/crdb/36.0.0/up2.sql b/schema/crdb/36.0.0/up2.sql new file mode 100644 index 0000000000..7c0082fbb0 --- /dev/null +++ b/schema/crdb/36.0.0/up2.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.blueprint + ALTER COLUMN internal_dns_version DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6c82b63e6e..87a22d1adc 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3096,7 +3096,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( -- These fields are for debugging only. time_created TIMESTAMPTZ NOT NULL, creator TEXT NOT NULL, - comment TEXT NOT NULL + comment TEXT NOT NULL, + + -- identifies the latest internal DNS version when blueprint planning began + internal_dns_version INT8 NOT NULL ); -- table describing both the current and historical target blueprints of the @@ -3515,7 +3518,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '35.0.0', NULL) + ( TRUE, NOW(), NOW(), '36.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 0b633c2057..77fd8a39de 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -19,7 +19,6 @@ use omicron_common::address::{ RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{MacAddr, Vni}; -use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::api::internal::shared::{ NetworkInterface, NetworkInterfaceKind, SourceNatConfig, }; @@ -295,36 +294,17 @@ impl Plan { "No scrimlets observed".to_string(), )); } - for (i, sled) in scrimlets.iter().enumerate() { + for sled in scrimlets.iter() { let address = get_switch_zone_address(sled.subnet); - let zone = - dns_builder.host_dendrite(sled.sled_id, address).unwrap(); dns_builder - .service_backend_zone( - ServiceName::Dendrite, - &zone, + .host_zone_switch( + sled.sled_id, + address, DENDRITE_PORT, - ) - .unwrap(); - dns_builder - .service_backend_zone( - ServiceName::ManagementGatewayService, - &zone, MGS_PORT, + MGD_PORT, ) .unwrap(); - dns_builder - .service_backend_zone(ServiceName::Mgd, &zone, MGD_PORT) - .unwrap(); - - // TODO only works for single rack - let sled_address = get_sled_address(sled.subnet); - let switch_location = if i == 0 { - SwitchLocation::Switch0 - } else { - SwitchLocation::Switch1 - }; - dns_builder.host_scrimlet(switch_location, sled_address).unwrap(); } // We'll stripe most services across all available Sleds, round-robin @@ -354,11 +334,11 @@ impl Plan { let dns_address = SocketAddrV6::new(ip, DNS_PORT, 0, 0); let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::InternalDns, - &zone, DNS_HTTP_PORT, ) .unwrap(); @@ -391,9 +371,13 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::COCKROACH_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone(ServiceName::Cockroach, &zone, port) + .host_zone_with_one_backend( + id, + ip, + ServiceName::Cockroach, + port, + ) .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::CockroachDb)?; @@ -427,11 +411,11 @@ impl Plan { let internal_ip = sled.addr_alloc.next().expect("Not enough addrs"); let http_port = omicron_common::address::DNS_HTTP_PORT; let http_address = SocketAddrV6::new(internal_ip, http_port, 0, 0); - let zone = dns_builder.host_zone(id, internal_ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + internal_ip, ServiceName::ExternalDns, - &zone, http_port, ) .unwrap(); @@ -463,11 +447,11 @@ impl Plan { }; let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + address, ServiceName::Nexus, - &zone, omicron_common::address::NEXUS_INTERNAL_PORT, ) .unwrap(); @@ -506,11 +490,11 @@ impl Plan { }; let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + address, ServiceName::Oximeter, - &zone, omicron_common::address::OXIMETER_PORT, ) .unwrap(); @@ -540,9 +524,13 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone(ServiceName::Clickhouse, &zone, port) + .host_zone_with_one_backend( + id, + ip, + ServiceName::Clickhouse, + port, + ) .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::Clickhouse)?; @@ -572,11 +560,11 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_KEEPER_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::ClickhouseKeeper, - &zone, port, ) .unwrap(); @@ -605,9 +593,13 @@ impl Plan { let address = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PANTRY_PORT; let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone(ServiceName::CruciblePantry, &zone, port) + .host_zone_with_one_backend( + id, + address, + ServiceName::CruciblePantry, + port, + ) .unwrap(); sled.request.zones.push(OmicronZoneConfig { id, @@ -626,11 +618,11 @@ impl Plan { let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::Crucible(id), - &zone, port, ) .unwrap(); @@ -653,7 +645,6 @@ impl Plan { for (idx, sled) in sled_info.iter_mut().enumerate() { let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { @@ -683,7 +674,9 @@ impl Plan { ) }; - dns_builder.service_backend_zone(svcname, &zone, NTP_PORT).unwrap(); + dns_builder + .host_zone_with_one_backend(id, address, svcname, NTP_PORT) + .unwrap(); sled.request.zones.push(OmicronZoneConfig { id,