diff --git a/Cargo.lock b/Cargo.lock index c00f0c17f6..31d3529588 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4245,6 +4245,7 @@ dependencies = [ "chrono", "futures", "httptest", + "internal-dns", "nexus-db-model", "nexus-db-queries", "nexus-test-utils", @@ -4258,6 +4259,7 @@ dependencies = [ "reqwest", "sled-agent-client", "slog", + "slog-error-chain", "tokio", "uuid", ] diff --git a/clients/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs index e437f1a7f6..51bbf6e360 100644 --- a/clients/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -5,7 +5,7 @@ progenitor::generate_api!( spec = "../../openapi/dns-server.json", inner_type = slog::Logger, - derives = [schemars::JsonSchema, Eq, PartialEq], + derives = [schemars::JsonSchema, Clone, Eq, PartialEq], pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; "method" => %request.method(), diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index cdc929d89b..17b4826f8c 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -679,6 +679,12 @@ impl From<&Generation> for i64 { } } +impl From for u64 { + fn from(g: Generation) -> Self { + g.0 + } +} + impl From for Generation { fn from(value: u32) -> Self { Generation(u64::from(value)) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index f00c05f1ec..e7ca4323d9 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -844,7 +844,7 @@ async fn cmd_nexus_blueprints_diff( let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context( || format!("fetching blueprint {}", args.blueprint2_id), )?; - println!("{}", b1.diff(&b2)); + println!("{}", b1.diff_sleds(&b2)); Ok(()) } diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index bf1d9b763b..5eee34bd51 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -63,9 +63,9 @@ use crate::names::{ServiceName, DNS_ZONE}; use anyhow::{anyhow, ensure}; use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord}; -use omicron_common::api::internal::shared::SwitchLocation; +use omicron_common::api::external::Generation; use std::collections::BTreeMap; -use std::net::{Ipv6Addr, SocketAddrV6}; +use std::net::Ipv6Addr; use uuid::Uuid; /// Zones that can be referenced within the internal DNS system. @@ -147,8 +147,6 @@ pub struct DnsConfigBuilder { /// network sleds: BTreeMap, - scrimlets: BTreeMap, - /// set of hosts of type "zone" that have been configured so far, mapping /// each zone's unique uuid to its sole IPv6 address on the control plane /// network @@ -163,6 +161,9 @@ pub struct DnsConfigBuilder { /// similar to service_instances_zones, but for services that run on sleds service_instances_sleds: BTreeMap>, + + /// generation number for this config + generation: Generation, } /// Describes a host of type "sled" in the control plane DNS zone @@ -192,16 +193,17 @@ impl DnsConfigBuilder { DnsConfigBuilder { sleds: BTreeMap::new(), zones: BTreeMap::new(), - scrimlets: BTreeMap::new(), service_instances_zones: BTreeMap::new(), service_instances_sleds: BTreeMap::new(), + generation: Generation::new(), } } /// Add a new host of type "sled" to the configuration /// - /// Returns a [`Sled`] that can be used with [`Self::service_backend_sled()`] to - /// specify that this sled is a backend for some higher-level service. + /// Returns a [`Sled`] that can be used with + /// [`Self::service_backend_sled()`] to specify that this sled is a backend + /// for some higher-level service. /// /// # Errors /// @@ -223,19 +225,11 @@ impl DnsConfigBuilder { } } - pub fn host_scrimlet( - &mut self, - switch_location: SwitchLocation, - addr: SocketAddrV6, - ) -> anyhow::Result<()> { - self.scrimlets.insert(switch_location, addr); - Ok(()) - } - /// Add a new dendrite host of type "zone" to the configuration /// - /// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to - /// specify that this zone is a backend for some higher-level service. + /// Returns a [`Zone`] that can be used with + /// [`Self::service_backend_zone()`] to specify that this zone is a backend + /// for some higher-level service. /// /// # Errors /// @@ -251,8 +245,9 @@ impl DnsConfigBuilder { /// Add a new host of type "zone" to the configuration /// - /// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to - /// specify that this zone is a backend for some higher-level service. + /// Returns a [`Zone`] that can be used with + /// [`Self::service_backend_zone()`] to specify that this zone is a backend + /// for some higher-level service. /// /// # Errors /// @@ -363,6 +358,52 @@ impl DnsConfigBuilder { } } + /// Higher-level shorthand for adding a zone with a single backend service + /// + /// # Errors + /// + /// This function fails only if the given zone has already been added to the + /// configuration. + pub fn host_zone_with_one_backend( + &mut self, + zone_id: Uuid, + addr: Ipv6Addr, + service: ServiceName, + port: u16, + ) -> anyhow::Result<()> { + let zone = self.host_zone(zone_id, addr)?; + self.service_backend_zone(service, &zone, port) + } + + /// Higher-level shorthand for adding a "switch" zone with its usual set of + /// backend services + /// + /// # Errors + /// + /// This function fails only if the given zone has already been added to the + /// configuration. + pub fn host_zone_switch( + &mut self, + sled_id: Uuid, + switch_zone_ip: Ipv6Addr, + dendrite_port: u16, + mgs_port: u16, + mgd_port: u16, + ) -> anyhow::Result<()> { + let zone = self.host_dendrite(sled_id, switch_zone_ip)?; + self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?; + self.service_backend_zone( + ServiceName::ManagementGatewayService, + &zone, + mgs_port, + )?; + self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port) + } + + pub fn generation(&mut self, generation: Generation) { + self.generation = generation; + } + /// Construct a complete [`DnsConfigParams`] (suitable for propagating to /// our DNS servers) for the control plane DNS zone described up to this /// point @@ -378,23 +419,6 @@ impl DnsConfigBuilder { (zone.dns_name(), vec![DnsRecord::Aaaa(zone_ip)]) }); - let scrimlet_srv_records = - self.scrimlets.clone().into_iter().map(|(location, addr)| { - let srv = DnsRecord::Srv(dns_service_client::types::Srv { - prio: 0, - weight: 0, - port: addr.port(), - target: format!("{location}.scrimlet.{}", DNS_ZONE), - }); - (ServiceName::Scrimlet(location).dns_name(), vec![srv]) - }); - - let scrimlet_aaaa_records = - self.scrimlets.into_iter().map(|(location, addr)| { - let aaaa = DnsRecord::Aaaa(*addr.ip()); - (format!("{location}.scrimlet"), vec![aaaa]) - }); - // Assemble the set of SRV records, which implicitly point back at // zones' AAAA records. let srv_records_zones = self.service_instances_zones.into_iter().map( @@ -439,12 +463,10 @@ impl DnsConfigBuilder { .chain(zone_records) .chain(srv_records_sleds) .chain(srv_records_zones) - .chain(scrimlet_aaaa_records) - .chain(scrimlet_srv_records) .collect(); DnsConfigParams { - generation: 1, + generation: u64::from(self.generation), time_created: chrono::Utc::now(), zones: vec![DnsConfigZone { zone_name: DNS_ZONE.to_owned(), diff --git a/nexus/blueprint-execution/Cargo.toml b/nexus/blueprint-execution/Cargo.toml index 11d8003599..dbe3cc0758 100644 --- a/nexus/blueprint-execution/Cargo.toml +++ b/nexus/blueprint-execution/Cargo.toml @@ -9,11 +9,15 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true futures.workspace = true +internal-dns.workspace = true +nexus-db-model.workspace = true nexus-db-queries.workspace = true nexus-types.workspace = true +omicron-common.workspace = true reqwest.workspace = true sled-agent-client.workspace = true slog.workspace = true +slog-error-chain.workspace = true uuid.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. This is needed @@ -26,9 +30,7 @@ omicron-workspace-hack.workspace = true [dev-dependencies] chrono.workspace = true httptest.workspace = true -nexus-db-model.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true -omicron-common.workspace = true omicron-nexus.workspace = true tokio.workspace = true diff --git a/nexus/blueprint-execution/src/dns.rs b/nexus/blueprint-execution/src/dns.rs new file mode 100644 index 0000000000..9d7a968f92 --- /dev/null +++ b/nexus/blueprint-execution/src/dns.rs @@ -0,0 +1,296 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Propagates internal DNS changes in a given blueprint + +use crate::Sled; +use anyhow::ensure; +use internal_dns::DnsConfigBuilder; +use internal_dns::ServiceName; +use nexus_db_model::DnsGroup; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::OmicronZoneType; +use nexus_types::internal_api::params::DnsConfigParams; +use nexus_types::internal_api::params::DnsRecord; +use omicron_common::address::get_switch_zone_address; +use omicron_common::address::CLICKHOUSE_KEEPER_PORT; +use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::COCKROACH_PORT; +use omicron_common::address::CRUCIBLE_PANTRY_PORT; +use omicron_common::address::CRUCIBLE_PORT; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::DNS_HTTP_PORT; +use omicron_common::address::MGD_PORT; +use omicron_common::address::MGS_PORT; +use omicron_common::address::NEXUS_INTERNAL_PORT; +use omicron_common::address::NTP_PORT; +use omicron_common::address::OXIMETER_PORT; +use omicron_common::api::external::Error; +use omicron_common::api::external::Generation; +use omicron_common::api::external::InternalContext; +use slog::{debug, info}; +use std::collections::BTreeMap; +use std::collections::HashMap; +use uuid::Uuid; + +pub async fn deploy_dns( + opctx: &OpContext, + datastore: &DataStore, + creator: S, + blueprint: &Blueprint, + sleds_by_id: &BTreeMap, +) -> Result<(), Error> +where + String: From, +{ + // First, fetch the current DNS config. + let dns_config_current = datastore + .dns_config_read(opctx, DnsGroup::Internal) + .await + .internal_context("reading current DNS")?; + + // We could check here that the DNS version we found isn't newer than when + // the blueprint was generated. But we have to check later when we try to + // update the database anyway. And we're not wasting much effort allowing + // this proceed for now. This way, we have only one code path for this and + // we know it's being hit when we exercise this condition. + + // Next, construct the DNS config represented by the blueprint. + let dns_config_blueprint = blueprint_dns_config(blueprint, sleds_by_id); + + // Diff these configurations and use the result to prepare an update. + let comment = format!("blueprint {} ({})", blueprint.id, blueprint.comment); + let mut update = DnsVersionUpdateBuilder::new( + DnsGroup::Internal, + comment, + String::from(creator), + ); + + let diff = DnsDiff::new(&dns_config_current, &dns_config_blueprint) + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + if !diff.is_empty() { + info!(opctx.log, "blueprint execution: DNS: no changes"); + return Ok(()); + } + + for (name, new_records) in diff.names_added() { + debug!( + opctx.log, + "blueprint dns update: adding name"; + "name" => name, + "new_records" => ?new_records, + ); + update.add_name( + name.to_string(), + new_records.into_iter().cloned().collect(), + )?; + } + + for (name, old_records) in diff.names_removed() { + debug!( + opctx.log, + "blueprint dns update: removing name"; + "name" => name, + "old_records" => ?old_records, + ); + update.remove_name(name.to_string())?; + } + + for (name, old_records, new_records) in diff.names_changed() { + debug!( + opctx.log, + "blueprint dns update: updating name"; + "name" => name, + "old_records" => ?old_records, + "new_records" => ?new_records, + ); + update.remove_name(name.to_string())?; + update.add_name( + name.to_string(), + new_records.into_iter().cloned().collect(), + )?; + } + + info!( + opctx.log, + "blueprint execution: DNS: attempting to update from generation {}", + dns_config_blueprint.generation, + ); + let generation_u32 = u32::try_from(dns_config_blueprint.generation) + .map_err(|e| { + Error::internal_error(&format!( + "internal DNS generation got too large: {}", + e, + )) + })?; + let generation = + nexus_db_model::Generation::from(Generation::from(generation_u32)); + datastore.dns_update_from_version(opctx, update, generation).await +} + +pub fn blueprint_dns_config( + blueprint: &Blueprint, + sleds_by_id: &BTreeMap, +) -> DnsConfigParams { + // The DNS names configured here should match what RSS configures for the + // same zones. It's tricky to have RSS share the same code because it uses + // Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're + // using `sled-agent-client`'s version of that type. However, the + // DnsConfigBuilder's interface is high-level enough that it handles most of + // the details. + let mut dns_builder = DnsConfigBuilder::new(); + + // The code below assumes that all zones are using the default port numbers. + // That should be true, as those are the only ports ever used today. + // In an ideal world, the correct port would be pulled out of the + // `OmicronZoneType` variant instead. Although that information is present, + // it's irritatingly non-trivial to do right now because SocketAddrs are + // represented as strings, so we'd need to parse all of them and handle all + // the errors, even though they should never happen. + // See oxidecomputer/omicron#4988. + for (_, omicron_zone) in blueprint.all_omicron_zones() { + let (service_name, port) = match omicron_zone.zone_type { + OmicronZoneType::BoundaryNtp { .. } => { + (ServiceName::BoundaryNtp, NTP_PORT) + } + OmicronZoneType::InternalNtp { .. } => { + (ServiceName::InternalNtp, NTP_PORT) + } + OmicronZoneType::Clickhouse { .. } => { + (ServiceName::Clickhouse, CLICKHOUSE_PORT) + } + OmicronZoneType::ClickhouseKeeper { .. } => { + (ServiceName::ClickhouseKeeper, CLICKHOUSE_KEEPER_PORT) + } + OmicronZoneType::CockroachDb { .. } => { + (ServiceName::Cockroach, COCKROACH_PORT) + } + OmicronZoneType::Nexus { .. } => { + (ServiceName::Nexus, NEXUS_INTERNAL_PORT) + } + OmicronZoneType::Crucible { .. } => { + (ServiceName::Crucible(omicron_zone.id), CRUCIBLE_PORT) + } + OmicronZoneType::CruciblePantry { .. } => { + (ServiceName::CruciblePantry, CRUCIBLE_PANTRY_PORT) + } + OmicronZoneType::Oximeter { .. } => { + (ServiceName::Oximeter, OXIMETER_PORT) + } + OmicronZoneType::ExternalDns { .. } => { + (ServiceName::ExternalDns, DNS_HTTP_PORT) + } + OmicronZoneType::InternalDns { .. } => { + (ServiceName::InternalDns, DNS_HTTP_PORT) + } + }; + + // This unwrap is safe because this function only fails if we provide + // the same zone id twice, which should not be possible here. + dns_builder + .host_zone_with_one_backend( + omicron_zone.id, + omicron_zone.underlay_address, + service_name, + port, + ) + .unwrap(); + } + + let scrimlets = sleds_by_id.values().filter(|sled| sled.is_scrimlet); + for scrimlet in scrimlets { + let sled_subnet = scrimlet.subnet(); + let switch_zone_ip = get_switch_zone_address(sled_subnet); + // unwrap(): see above. + dns_builder + .host_zone_switch( + scrimlet.id, + switch_zone_ip, + DENDRITE_PORT, + MGS_PORT, + MGD_PORT, + ) + .unwrap(); + } + + dns_builder.generation(blueprint.internal_dns_version.next()); + dns_builder.build() +} + +type DnsRecords = HashMap>; +pub struct DnsDiff<'a> { + left: &'a DnsRecords, + right: &'a DnsRecords, +} + +impl<'a> DnsDiff<'a> { + pub fn new( + left: &'a DnsConfigParams, + right: &'a DnsConfigParams, + ) -> Result, anyhow::Error> { + let left_zone = { + ensure!( + left.zones.len() == 1, + "left side of diff: expected exactly one \ + DNS zone, but found {}", + left.zones.len(), + ); + + &left.zones[0] + }; + + let right_zone = { + ensure!( + right.zones.len() == 1, + "right side of diff: expected exactly one \ + DNS zone, but found {}", + right.zones.len(), + ); + + &right.zones[0] + }; + + ensure!( + left_zone.zone_name == right_zone.zone_name, + "cannot compare DNS configuration from zones with different names: \ + {:?} vs. {:?}", left_zone.zone_name, right_zone.zone_name, + ); + + Ok(DnsDiff { left: &left_zone.records, right: &right_zone.records }) + } + + pub fn names_added(&self) -> impl Iterator { + self.right + .iter() + .filter(|(k, _)| !self.left.contains_key(*k)) + .map(|(k, v)| (k.as_ref(), v.as_ref())) + } + + pub fn names_removed(&self) -> impl Iterator { + self.left + .iter() + .filter(|(k, _)| !self.right.contains_key(*k)) + .map(|(k, v)| (k.as_ref(), v.as_ref())) + } + + pub fn names_changed( + &self, + ) -> impl Iterator { + self.left.iter().filter_map(|(k, v1)| match self.right.get(k) { + Some(v2) if v1 != v2 => { + Some((k.as_ref(), v1.as_ref(), v2.as_ref())) + } + _ => None, + }) + } + + pub fn is_empty(&self) -> bool { + self.names_added().next().is_none() + && self.names_removed().next().is_none() + && self.names_changed().next().is_none() + } +} diff --git a/nexus/blueprint-execution/src/lib.rs b/nexus/blueprint-execution/src/lib.rs index f7bfd7d30c..26cc4686b7 100644 --- a/nexus/blueprint-execution/src/lib.rs +++ b/nexus/blueprint-execution/src/lib.rs @@ -6,29 +6,81 @@ //! //! See `nexus_deployment` crate-level docs for background. +use anyhow::{anyhow, Context}; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; -use slog::o; +use nexus_types::identity::Asset; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use slog::info; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use std::net::SocketAddrV6; +use uuid::Uuid; +mod dns; mod omicron_zones; +struct Sled { + id: Uuid, + sled_agent_address: SocketAddrV6, + is_scrimlet: bool, +} + +impl Sled { + pub fn subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(*self.sled_agent_address.ip()) + } +} + +impl From for Sled { + fn from(value: nexus_db_model::Sled) -> Self { + Sled { + id: value.id(), + sled_agent_address: value.address(), + is_scrimlet: value.is_scrimlet(), + } + } +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, blueprint: &Blueprint, -) -> Result<(), Vec> { - let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); - omicron_zones::deploy_zones( - &log, - opctx, - datastore, - &blueprint.omicron_zones, - ) - .await + nexus_label: S, +) -> Result<(), Vec> +where + String: From, +{ + let opctx = opctx.child(BTreeMap::from([( + "comment".to_string(), + blueprint.comment.clone(), + )])); + + info!( + opctx.log, + "attempting to realize blueprint"; + "blueprint_id" => ?blueprint.id + ); + + let sleds_by_id: BTreeMap = datastore + .sled_list_all_batched(&opctx) + .await + .context("listing all sleds") + .map_err(|e| vec![e])? + .into_iter() + .map(|db_sled| (db_sled.id(), Sled::from(db_sled))) + .collect(); + omicron_zones::deploy_zones(&opctx, &sleds_by_id, &blueprint.omicron_zones) + .await?; + + dns::deploy_dns(&opctx, &datastore, nexus_label, &blueprint, &sleds_by_id) + .await + .map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))]) } diff --git a/nexus/blueprint-execution/src/omicron_zones.rs b/nexus/blueprint-execution/src/omicron_zones.rs index f3e81d283d..1d5c4444b1 100644 --- a/nexus/blueprint-execution/src/omicron_zones.rs +++ b/nexus/blueprint-execution/src/omicron_zones.rs @@ -4,50 +4,49 @@ //! Manges deployment of Omicron zones to Sled Agents +use crate::Sled; +use anyhow::anyhow; use anyhow::Context; use futures::stream; use futures::StreamExt; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::DataStore; use nexus_types::deployment::OmicronZonesConfig; use sled_agent_client::Client as SledAgentClient; use slog::info; use slog::warn; -use slog::Logger; use std::collections::BTreeMap; use uuid::Uuid; /// Idempotently ensure that the specified Omicron zones are deployed to the /// corresponding sleds pub(crate) async fn deploy_zones( - log: &Logger, opctx: &OpContext, - datastore: &DataStore, + sleds_by_id: &BTreeMap, zones: &BTreeMap, ) -> Result<(), Vec> { let errors: Vec<_> = stream::iter(zones) .filter_map(|(sled_id, config)| async move { - let client = match sled_client(opctx, datastore, *sled_id).await { - Ok(client) => client, - Err(err) => { - warn!(log, "{err:#}"); + let db_sled = match sleds_by_id.get(sled_id) { + Some(sled) => sled, + None => { + let err = anyhow!("sled not found in db list: {}", sled_id); + warn!(opctx.log, "{err:#}"); return Some(err); } }; + let client = sled_client(opctx, &db_sled); let result = client.omicron_zones_put(&config).await.with_context(|| { format!("Failed to put {config:#?} to sled {sled_id}") }); - match result { Err(error) => { - warn!(log, "{error:#}"); + warn!(opctx.log, "{error:#}"); Some(error) } Ok(_) => { info!( - log, + opctx.log, "Successfully deployed zones for sled agent"; "sled_id" => %sled_id, "generation" => %config.generation, @@ -71,43 +70,27 @@ pub(crate) async fn deploy_zones( // method on the `Nexus` type. We want to have a more constrained type we can // pass into background tasks for this type of functionality, but for now we // just copy the functionality. -async fn sled_client( - opctx: &OpContext, - datastore: &DataStore, - sled_id: Uuid, -) -> Result { - let (.., sled) = LookupPath::new(opctx, datastore) - .sled_id(sled_id) - .fetch() - .await - .with_context(|| { - format!( - "Failed to create sled_agent::Client for sled_id: {}", - sled_id - ) - })?; +fn sled_client(opctx: &OpContext, sled: &Sled) -> SledAgentClient { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .unwrap(); - Ok(SledAgentClient::new_with_client( - &format!("http://{}", sled.address()), + SledAgentClient::new_with_client( + &format!("http://{}", sled.sled_agent_address), client, opctx.log.clone(), - )) + ) } #[cfg(test)] mod test { use super::deploy_zones; + use crate::Sled; use httptest::matchers::{all_of, json_decoded, request}; use httptest::responders::status_code; use httptest::Expectation; - use nexus_db_model::{ - ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, - }; use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::OmicronZonesConfig; @@ -140,6 +123,7 @@ mod test { omicron_zones, zones_in_service: BTreeSet::new(), parent_blueprint_id: None, + internal_dns_version: Generation::new(), time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), @@ -155,14 +139,6 @@ mod test { cptestctx.logctx.log.clone(), datastore.clone(), ); - let log = opctx.log.clone(); - - // Get a success result back when the blueprint has an empty set of - // zones. - let blueprint = Arc::new(create_blueprint(BTreeMap::new())); - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) - .await - .expect("failed to deploy no zones"); // Create some fake sled-agent servers to respond to zone puts and add // sleds to CRDB. @@ -170,67 +146,61 @@ mod test { let mut s2 = httptest::Server::run(); let sled_id1 = Uuid::new_v4(); let sled_id2 = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); - for (i, (sled_id, server)) in - [(sled_id1, &s1), (sled_id2, &s2)].iter().enumerate() - { - let SocketAddr::V6(addr) = server.addr() else { - panic!("Expected Ipv6 address. Got {}", server.addr()); - }; - let update = SledUpdate::new( - *sled_id, - addr, - SledBaseboard { - serial_number: i.to_string(), - part_number: "test".into(), - revision: 1, - }, - SledSystemHardware { - is_scrimlet: false, - usable_hardware_threads: 4, - usable_physical_ram: ByteCount(1000.into()), - reservoir_size: ByteCount(999.into()), - }, - rack_id, - ); - datastore - .sled_upsert(update) - .await - .expect("Failed to insert sled to db"); - } - - // The particular dataset doesn't matter for this test. - // We re-use the same one to not obfuscate things - let dataset = OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()).parse().unwrap(), - }; + let sleds_by_id: BTreeMap = + [(sled_id1, &s1), (sled_id2, &s2)] + .into_iter() + .map(|(sled_id, server)| { + let SocketAddr::V6(addr) = server.addr() else { + panic!("Expected Ipv6 address. Got {}", server.addr()); + }; + let sled = Sled { + id: sled_id, + sled_agent_address: addr, + is_scrimlet: false, + }; + (sled_id, sled) + }) + .collect(); - let generation = Generation::new(); + // Get a success result back when the blueprint has an empty set of + // zones. + let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy no zones"); // Zones are updated in a particular order, but each request contains // the full set of zones that must be running. // See `rack_setup::service::ServiceInner::run` for more details. - let mut zones = OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { - dataset, - dns_address: "oh-hello-internal-dns".into(), - gz_address: "::1".parse().unwrap(), - gz_address_index: 0, - http_address: "some-ipv6-address".into(), - }, - }], - }; + fn make_zones() -> OmicronZonesConfig { + OmicronZonesConfig { + generation: Generation::new(), + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), + }, + }], + } + } // Create a blueprint with only the `InternalDns` zone for both servers // We reuse the same `OmicronZonesConfig` because the details don't // matter for this test. + let mut zones1 = make_zones(); + let mut zones2 = make_zones(); let blueprint = Arc::new(create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), + (sled_id1, zones1.clone()), + (sled_id2, zones2.clone()), ]))); // Set expectations for the initial requests sent to the fake @@ -250,7 +220,7 @@ mod test { } // Execute it. - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy initial zones"); @@ -267,7 +237,7 @@ mod test { .respond_with(status_code(204)), ); } - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy same zones"); s1.verify_and_clear(); @@ -291,7 +261,7 @@ mod test { ); let errors = - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect_err("unexpectedly succeeded in deploying zones"); @@ -304,21 +274,25 @@ mod test { s2.verify_and_clear(); // Add an `InternalNtp` zone for our next update - zones.generation = generation.next(); - zones.zones.push(OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalNtp { - address: "::1".into(), - dns_servers: vec!["::1".parse().unwrap()], - domain: None, - ntp_servers: vec!["some-ntp-server-addr".into()], - }, - }); + fn append_zone(zones: &mut OmicronZonesConfig) { + zones.generation = zones.generation.next(); + zones.zones.push(OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalNtp { + address: "::1".into(), + dns_servers: vec!["::1".parse().unwrap()], + domain: None, + ntp_servers: vec!["some-ntp-server-addr".into()], + }, + }); + } + append_zone(&mut zones1); + append_zone(&mut zones2); let blueprint = Arc::new(create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), + (sled_id1, zones1), + (sled_id2, zones2), ]))); // Set our new expectations @@ -337,7 +311,7 @@ mod test { } // Activate the task - deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + deploy_zones(&opctx, &sleds_by_id, &blueprint.1.omicron_zones) .await .expect("failed to deploy last round of zones"); s1.verify_and_clear(); diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index 34fe08d78c..a1f285fbef 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -23,6 +23,7 @@ use uuid::Uuid; pub struct Blueprint { pub id: Uuid, pub parent_blueprint_id: Option, + pub internal_dns_version: Generation, pub time_created: DateTime, pub creator: String, pub comment: String, @@ -33,6 +34,7 @@ impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { Self { id: bp.id, parent_blueprint_id: bp.parent_blueprint_id, + internal_dns_version: Generation(bp.internal_dns_version), time_created: bp.time_created, creator: bp.creator.clone(), comment: bp.comment.clone(), @@ -45,6 +47,7 @@ impl From for nexus_types::deployment::BlueprintMetadata { Self { id: value.id, parent_blueprint_id: value.parent_blueprint_id, + internal_dns_version: *value.internal_dns_version, time_created: value.time_created, creator: value.creator, comment: value.comment, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 736442282c..a2b5a539c5 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1403,6 +1403,7 @@ table! { id -> Uuid, parent_blueprint_id -> Nullable, + internal_dns_version -> Int8, time_created -> Timestamptz, creator -> Text, diff --git a/nexus/db-queries/src/context.rs b/nexus/db-queries/src/context.rs index 835256fc58..72da01a5f1 100644 --- a/nexus/db-queries/src/context.rs +++ b/nexus/db-queries/src/context.rs @@ -257,6 +257,65 @@ impl OpContext { ); result } + + /// Returns an error if we're currently in a context where expensive or + /// complex operations should not be allowed + /// + /// This is intended for checking that we're not trying to perform expensive + /// or complex (multi-step) operations from HTTP request handlers. + /// Generally, expensive or complex operations should be broken up into + /// multiple requests (e.g., pagination). That's for a variety reasons: + /// + /// - DoS mitigation: requests that kick off an arbitrarily-large amount of + /// work can tie up server resources without requiring commensurate + /// resources from the client, which makes it very easy to attack the + /// system + /// + /// - monitoring: it's easier to reason about metrics for operations that + /// are roughly bounded in size (otherwise, things like "requests per + /// second" can become meaningless) + /// + /// - stability: very large database queries have outsize effects on the + /// rest of the system (potentially blocking other clients for extended + /// periods of time, or preventing the database from timely cleanup of + /// invalidated data, etc.) + /// + /// - throttling: separate requests gives us an opportunity to dynamically + /// throttle clients that are hitting the system hard + /// + /// - failure transparency: when one request kicks off a complex + /// (multi-step) operation, it's harder to communicate programmatically + /// why the request failed + /// + /// - retries: when failures happen during smaller operations, clients can + /// retry only the part that failed. When failures happen during complex + /// (multi-step) operations, the client has to retry the whole thing. + /// This is much worse than it sounds: it means that for the request to + /// succeed, _all_ of the suboperations have to succeed. During a period + /// of transient failures, that could be extremely unlikely. With smaller + /// requests, clients can just retry each one until it succeeds without + /// having to retry the requests that already succeeded (whose failures + /// would trigger another attempt at the whole thing). + /// + /// - Simple request-response HTTP is not well-suited to long-running + /// operations. There's no way to communicate progress or even that the + /// request is still going. There's no good way to cancel such a request, + /// either. Clients and proxies often don't expect long requests and + /// apply aggressive timeouts. Depending on the HTTP version, a + /// long-running request can tie up the TCP connection. + pub fn check_complex_operations_allowed(&self) -> Result<(), Error> { + let api_handler = match self.kind { + OpKind::ExternalApiRequest | OpKind::InternalApiRequest => true, + OpKind::Saga | OpKind::Background | OpKind::Test => false, + }; + if api_handler { + Err(Error::internal_error( + "operation not allowed from API handlers", + )) + } else { + Ok(()) + } + } } impl Session for ConsoleSessionWithSiloId { diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 72adb1d3df..e4625222bf 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -233,7 +233,13 @@ impl DataStore { // Read the metadata from the primary blueprint row, and ensure that it // exists. - let (parent_blueprint_id, time_created, creator, comment) = { + let ( + parent_blueprint_id, + internal_dns_version, + time_created, + creator, + comment, + ) = { use db::schema::blueprint::dsl; let Some(blueprint) = dsl::blueprint @@ -251,6 +257,7 @@ impl DataStore { ( blueprint.parent_blueprint_id, + *blueprint.internal_dns_version, blueprint.time_created, blueprint.creator, blueprint.comment, @@ -479,6 +486,7 @@ impl DataStore { omicron_zones, zones_in_service, parent_blueprint_id, + internal_dns_version, time_created, creator, comment, @@ -1055,6 +1063,7 @@ mod tests { use nexus_types::deployment::SledResources; use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::thread_rng; use rand::Rng; @@ -1160,6 +1169,7 @@ mod tests { let policy = policy_from_collection(&collection); let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "test", ) @@ -1193,6 +1203,7 @@ mod tests { nexus_inventory::CollectionBuilder::new("test").build(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &EMPTY_POLICY, "test", ) @@ -1319,8 +1330,12 @@ mod tests { let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; // Create a builder for a child blueprint. - let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test"); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &policy, + "test", + ); // Add zones to our new sled. assert_eq!( @@ -1366,7 +1381,7 @@ mod tests { .blueprint_read(&opctx, &authz_blueprint2) .await .expect("failed to read collection back"); - println!("diff: {}", blueprint2.diff(&blueprint_read)); + println!("diff: {}", blueprint2.diff_sleds(&blueprint_read)); assert_eq!(blueprint2, blueprint_read); { let mut expected_ids = [blueprint1.id, blueprint2.id]; @@ -1459,16 +1474,25 @@ mod tests { nexus_inventory::CollectionBuilder::new("test").build(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &EMPTY_POLICY, "test1", ) .unwrap(); - let blueprint2 = - BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") - .build(); - let blueprint3 = - BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") - .build(); + let blueprint2 = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &EMPTY_POLICY, + "test2", + ) + .build(); + let blueprint3 = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &EMPTY_POLICY, + "test3", + ) + .build(); assert_eq!(blueprint1.parent_blueprint_id, None); assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); assert_eq!(blueprint3.parent_blueprint_id, Some(blueprint1.id)); @@ -1557,9 +1581,13 @@ mod tests { // Create a child of blueprint3, and ensure when we set it as the target // with enabled=false, that status is serialized. - let blueprint4 = - BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") - .build(); + let blueprint4 = BlueprintBuilder::new_based_on( + &blueprint3, + Generation::new(), + &EMPTY_POLICY, + "test3", + ) + .build(); assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); let bp4_target = BlueprintTarget { diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index 552ad31487..2d59fcf7e5 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -346,8 +346,77 @@ impl DataStore { Ok(()) } + /// Update the configuration of a DNS zone as specified in `update`, + /// conditional on the _current_ DNS version being `old_version`. + /// + /// Unlike `dns_update_incremental()`, this function assumes the caller has + /// already constructed `update` based on a specific DNS version + /// (`old_version`) and only wants to apply these changes if the DNS version + /// in the database has not changed. + /// + /// Also unlike `dns_update_incremental()`, this function creates its own + /// transaction to apply the update. + /// + /// Like `dns_update_incremental()`, **callers almost certainly want to wake + /// up the corresponding Nexus background task to cause these changes to be + /// propagated to the corresponding DNS servers.** + pub async fn dns_update_from_version( + &self, + opctx: &OpContext, + update: DnsVersionUpdateBuilder, + old_version: Generation, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::DNS_CONFIG).await?; + let conn = self.pool_connection_authorized(opctx).await?; + conn.transaction_async(|c| async move { + let zones = self + .dns_zones_list_all_on_connection(opctx, &c, update.dns_group) + .await?; + // This looks like a time-of-check-to-time-of-use race, but this + // approach works because we're inside a transaction and the + // isolation level is SERIALIZABLE. + let version = self + .dns_group_latest_version_conn(opctx, &c, update.dns_group) + .await?; + if version.version != old_version { + return Err(TransactionError::CustomError(Error::conflict( + format!( + "expected current DNS version to be {}, found {}", + *old_version, *version.version, + ), + ))); + } + + self.dns_write_version_internal( + &c, + update, + zones, + Generation(old_version.next()), + ) + .await + }) + .await + .map_err(|e| match e { + TransactionError::CustomError(e) => e, + TransactionError::Database(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } + /// Update the configuration of a DNS zone as specified in `update` /// + /// Unlike `dns_update_from_version()`, this function assumes that the + /// caller's changes are valid regardless of the current DNS configuration. + /// This function fetches the latest version and always writes the updated + /// config as the next version. This is appropriate if the caller is adding + /// something wholly new or removing something that it knows should be + /// present (as in the case when we add or remove DNS names for a Silo, + /// since we control exactly when that happens). This is _not_ appropriate + /// if the caller is making arbitrary changes that might conflict with a + /// concurrent caller. For that, you probably want + /// `dns_update_from_version()`. + /// /// This function runs the body inside a transaction (if no transaction is /// open) or a nested transaction (savepoint, if a transaction is already /// open). Generally, the caller should invoke this function while already @@ -358,14 +427,14 @@ impl DataStore { /// /// It's recommended to put this step last in any transaction because the /// more time elapses between running this function and attempting to commit - /// the transaction, the greater the change of either transaction failure + /// the transaction, the greater the chance of either transaction failure /// due to a conflict error (if some other caller attempts to update the /// same DNS group) or another client blocking (for the same reason). /// /// **Callers almost certainly want to wake up the corresponding Nexus /// background task to cause these changes to be propagated to the /// corresponding DNS servers.** - pub async fn dns_update( + pub async fn dns_update_incremental( &self, opctx: &OpContext, conn: &async_bb8_diesel::Connection, @@ -378,19 +447,32 @@ impl DataStore { .await?; conn.transaction_async(|c| async move { - self.dns_update_internal(opctx, &c, update, zones).await + let version = self + .dns_group_latest_version_conn(opctx, conn, update.dns_group) + .await?; + self.dns_write_version_internal( + &c, + update, + zones, + Generation(version.version.next()), + ) + .await }) .await } // This must only be used inside a transaction. Otherwise, it may make - // invalid changes to the database state. Use `dns_update()` instead. - async fn dns_update_internal( + // invalid changes to the database state. Use one of the `dns_update_*()` + // functions instead. + // + // The caller should already have checked (in the same transaction) that + // their version number is the correct next version. + async fn dns_write_version_internal( &self, - opctx: &OpContext, conn: &async_bb8_diesel::Connection, update: DnsVersionUpdateBuilder, zones: Vec, + new_version_num: Generation, ) -> Result<(), TransactionError> { // TODO-scalability TODO-performance This would be much better as a CTE // for all the usual reasons described in RFD 192. Using an interactive @@ -401,11 +483,6 @@ impl DataStore { // operations fail spuriously as far as the client is concerned). We // expect these problems to be small or unlikely at small scale but // significant as the system scales up. - let dns_group = update.dns_group; - let version = - self.dns_group_latest_version_conn(opctx, conn, dns_group).await?; - let new_version_num = - nexus_db_model::Generation(version.version.next()); let new_version = DnsVersion { dns_group: update.dns_group, version: new_version_num, @@ -498,10 +575,11 @@ impl DataStore { /// and add the name back with different records. /// /// You use this object to build up a _description_ of the changes to the DNS -/// zone's configuration. Then you call [`DataStore::dns_update()`] to apply -/// these changes transactionally to the database. The changes are then -/// propagated asynchronously to the DNS servers. No changes are made (to -/// either the database or the DNS servers) while you modify this object. +/// zone's configuration. Then you call [`DataStore::dns_update_incremental()`] +/// or [`DataStore::dns_update_from_version()`] to apply these changes +/// transactionally to the database. The changes are then propagated +/// asynchronously to the DNS servers. No changes are made (to either the +/// database or the DNS servers) while you modify this object. /// /// This object changes all of the zones associated with a particular DNS group /// because the assumption right now is that they're equivalent. (In practice, @@ -1457,7 +1535,10 @@ mod test { update.add_name(String::from("n2"), records2.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } // Verify the new config. @@ -1490,7 +1571,10 @@ mod test { update.add_name(String::from("n1"), records12.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1520,7 +1604,10 @@ mod test { update.remove_name(String::from("n1")).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1547,7 +1634,10 @@ mod test { update.add_name(String::from("n1"), records2.clone()).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - datastore.dns_update(&opctx, &conn, update).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap(); } let dns_config = datastore @@ -1586,7 +1676,9 @@ mod test { let copctx = opctx.child(std::collections::BTreeMap::new()); let mut fut = conn1 .transaction_async(|c1| async move { - cds.dns_update(&copctx, &c1, update1).await.unwrap(); + cds.dns_update_incremental(&copctx, &c1, update1) + .await + .unwrap(); // Let the outside scope know we've done the update, but we // haven't committed the transaction yet. Wait for them to // tell us to proceed. @@ -1613,7 +1705,10 @@ mod test { String::from("the test suite"), ); update2.add_name(String::from("n1"), records1.clone()).unwrap(); - datastore.dns_update(&opctx, &conn2, update2).await.unwrap(); + datastore + .dns_update_incremental(&opctx, &conn2, update2) + .await + .unwrap(); // Now let the first one finish. wait2_tx.send(()).unwrap(); @@ -1657,8 +1752,10 @@ mod test { update.remove_name(String::from("n4")).unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); - let error = - datastore.dns_update(&opctx, &conn, update).await.unwrap_err(); + let error = datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap_err(); let error = match error { TransactionError::CustomError(err) => err, _ => panic!("Unexpected error: {:?}", error), @@ -1687,7 +1784,10 @@ mod test { let conn = datastore.pool_connection_for_tests().await.unwrap(); let error = Error::from( - datastore.dns_update(&opctx, &conn, update).await.unwrap_err(), + datastore + .dns_update_incremental(&opctx, &conn, update) + .await + .unwrap_err(), ); let msg = error.to_string(); assert!(msg.starts_with("Internal Error: "), "Message: {msg:}"); diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index b9ad2ea610..4dfee7f2a5 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -105,6 +105,7 @@ pub use instance::InstanceAndActiveVmm; pub use inventory::DataStoreInventoryTest; pub use rack::RackInit; pub use silo::Discoverability; +use std::num::NonZeroU32; pub use switch_port::SwitchPortSettingsCombinedResult; pub use virtual_provisioning_collection::StorageType; pub use volume::read_only_resources_associated_with_volume; @@ -121,6 +122,14 @@ pub const SERVICE_IP_POOL_NAME: &str = "oxide-service-pool"; /// The name of the built-in Project and VPC for Oxide services. pub const SERVICES_DB_NAME: &str = "oxide-services"; +/// "limit" to be used in SQL queries that paginate through large result sets +/// +/// This value is chosen to be small enough to avoid any queries being too +/// expensive. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +pub const SQL_BATCH_SIZE: NonZeroU32 = + unsafe { NonZeroU32::new_unchecked(1000) }; + // Represents a query that is ready to be executed. // // This helper trait lets the statement either be executed or explained. diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index a88a27872f..3477caac6f 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -282,7 +282,8 @@ impl DataStore { .await?; } - self.dns_update(nexus_opctx, &conn, dns_update).await?; + self.dns_update_incremental(nexus_opctx, &conn, dns_update) + .await?; self.silo_quotas_create( &conn, @@ -420,7 +421,7 @@ impl DataStore { ) .await?; - self.dns_update(dns_opctx, &conn, dns_update).await?; + self.dns_update_incremental(dns_opctx, &conn, dns_update).await?; info!(opctx.log, "deleted silo {}", id); diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 7b94d64418..c1da03b794 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`Sled`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -14,11 +15,13 @@ use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledUpdate; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::update_and_check::UpdateAndCheck; use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_types::identity::Asset; use omicron_common::api::external; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -77,6 +80,29 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// List all sleds, making as many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn sled_list_all_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.check_complex_operations_allowed()?; + + let mut all_sleds = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self.sled_list(opctx, &p.current_pagparams()).await?; + paginator = + p.found_batch(&batch, &|s: &nexus_db_model::Sled| s.id()); + all_sleds.extend(batch); + } + Ok(all_sleds) + } + pub async fn sled_reservation_create( &self, opctx: &OpContext, diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index ac2fe70e6b..9d6df2218f 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -70,6 +70,7 @@ pub enum Ensure { pub struct BlueprintBuilder<'a> { /// previous blueprint, on which this one will be based parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, // These fields are used to allocate resources from sleds. policy: &'a Policy, @@ -88,6 +89,7 @@ impl<'a> BlueprintBuilder<'a> { /// collection (representing no changes from the collection state) pub fn build_initial_from_collection( collection: &'a Collection, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> Result { @@ -134,6 +136,7 @@ impl<'a> BlueprintBuilder<'a> { omicron_zones, zones_in_service, parent_blueprint_id: None, + internal_dns_version, time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), @@ -144,11 +147,13 @@ impl<'a> BlueprintBuilder<'a> { /// starting with no changes from that state pub fn new_based_on( parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> BlueprintBuilder<'a> { BlueprintBuilder { parent_blueprint, + internal_dns_version, policy, sled_ip_allocators: BTreeMap::new(), omicron_zones: BTreeMap::new(), @@ -199,6 +204,7 @@ impl<'a> BlueprintBuilder<'a> { omicron_zones, zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), + internal_dns_version: self.internal_dns_version, time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), @@ -552,6 +558,7 @@ pub mod test { let blueprint_initial = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) @@ -561,7 +568,7 @@ pub mod test { // provide that ourselves. For our purposes though we don't care. let zones_in_service = blueprint_initial.zones_in_service.clone(); let diff = blueprint_initial - .diff_from_collection(&collection, &zones_in_service); + .diff_sleds_from_collection(&collection, &zones_in_service); println!( "collection -> initial blueprint (expected no changes):\n{}", diff @@ -573,11 +580,12 @@ pub mod test { // Test a no-op blueprint. let builder = BlueprintBuilder::new_based_on( &blueprint_initial, + Generation::new(), &policy, "test_basic", ); let blueprint = builder.build(); - let diff = blueprint_initial.diff(&blueprint); + let diff = blueprint_initial.diff_sleds(&blueprint); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff @@ -592,13 +600,18 @@ pub mod test { let (collection, mut policy) = example(); let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + Generation::new(), &policy, "the_test", ) .expect("failed to create initial blueprint"); - let mut builder = - BlueprintBuilder::new_based_on(&blueprint1, &policy, "test_basic"); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint1, + Generation::new(), + &policy, + "test_basic", + ); // The initial blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure @@ -613,7 +626,7 @@ pub mod test { } let blueprint2 = builder.build(); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff @@ -625,8 +638,12 @@ pub mod test { // The next step is adding these zones to a new sled. let new_sled_id = Uuid::new_v4(); let _ = policy_add_sled(&mut policy, new_sled_id); - let mut builder = - BlueprintBuilder::new_based_on(&blueprint2, &policy, "test_basic"); + let mut builder = BlueprintBuilder::new_based_on( + &blueprint2, + Generation::new(), + &policy, + "test_basic", + ); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { @@ -636,7 +653,7 @@ pub mod test { } let blueprint3 = builder.build(); - let diff = blueprint2.diff(&blueprint3); + let diff = blueprint2.diff_sleds(&blueprint3); println!("expecting new NTP and Crucible zones:\n{}", diff); // No sleds were changed or removed. diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index 0a8e1f0b81..8ea6c0ba19 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -12,6 +12,7 @@ use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; use nexus_types::inventory::Collection; +use omicron_common::api::external::Generation; use slog::{info, Logger}; pub struct Planner<'a> { @@ -34,14 +35,19 @@ impl<'a> Planner<'a> { pub fn new_based_on( log: Logger, parent_blueprint: &'a Blueprint, + internal_dns_version: Generation, policy: &'a Policy, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. inventory: &'a Collection, ) -> Planner<'a> { - let blueprint = - BlueprintBuilder::new_based_on(parent_blueprint, policy, creator); + let blueprint = BlueprintBuilder::new_based_on( + parent_blueprint, + internal_dns_version, + policy, + creator, + ); Planner { log, policy, blueprint, inventory } } @@ -166,6 +172,9 @@ mod test { fn test_basic_add_sled() { let logctx = test_setup_log("planner_basic_add_sled"); + // For our purposes, we don't care about the internal DNS generation. + let internal_dns_version = Generation::new(); + // Use our example inventory collection. let (mut collection, mut policy) = example(); @@ -173,6 +182,7 @@ mod test { // because there's a separate test for that. let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, + internal_dns_version, &policy, "the_test", ) @@ -184,6 +194,7 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, + internal_dns_version, &policy, "no-op?", &collection, @@ -191,7 +202,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff(&blueprint2); + let diff = blueprint1.diff_sleds(&blueprint2); println!("1 -> 2 (expected no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -206,6 +217,7 @@ mod test { let blueprint3 = Planner::new_based_on( logctx.log.clone(), &blueprint2, + internal_dns_version, &policy, "test: add NTP?", &collection, @@ -213,7 +225,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint2.diff(&blueprint3); + let diff = blueprint2.diff_sleds(&blueprint3); println!("2 -> 3 (expect new NTP zone on new sled):\n{}", diff); let sleds = diff.sleds_added().collect::>(); let (sled_id, sled_zones) = sleds[0]; @@ -236,13 +248,14 @@ mod test { let blueprint4 = Planner::new_based_on( logctx.log.clone(), &blueprint3, + internal_dns_version, &policy, "test: add nothing more", &collection, ) .plan() .expect("failed to plan"); - let diff = blueprint3.diff(&blueprint4); + let diff = blueprint3.diff_sleds(&blueprint4); println!("3 -> 4 (expected no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -270,6 +283,7 @@ mod test { let blueprint5 = Planner::new_based_on( logctx.log.clone(), &blueprint3, + internal_dns_version, &policy, "test: add Crucible zones?", &collection, @@ -277,7 +291,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint3.diff(&blueprint5); + let diff = blueprint3.diff_sleds(&blueprint5); println!("3 -> 5 (expect Crucible zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); @@ -303,6 +317,7 @@ mod test { let blueprint6 = Planner::new_based_on( logctx.log.clone(), &blueprint5, + internal_dns_version, &policy, "test: no-op?", &collection, @@ -310,7 +325,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint5.diff(&blueprint6); + let diff = blueprint5.diff_sleds(&blueprint6); println!("5 -> 6 (expect no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 84d4cef212..d340f1d38b 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -19,6 +19,7 @@ use tokio::sync::watch; pub struct BlueprintExecutor { datastore: Arc, rx_blueprint: watch::Receiver>>, + nexus_label: String, } impl BlueprintExecutor { @@ -27,8 +28,9 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, + nexus_label: String, ) -> BlueprintExecutor { - BlueprintExecutor { datastore, rx_blueprint } + BlueprintExecutor { datastore, rx_blueprint, nexus_label } } } @@ -65,6 +67,7 @@ impl BackgroundTask for BlueprintExecutor { opctx, &self.datastore, blueprint, + &self.nexus_label, ) .await; @@ -132,6 +135,7 @@ mod test { omicron_zones, zones_in_service: BTreeSet::new(), parent_blueprint_id: None, + internal_dns_version: Generation::new(), time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), @@ -185,7 +189,11 @@ mod test { } let (blueprint_tx, blueprint_rx) = watch::channel(None); - let mut task = BlueprintExecutor::new(datastore.clone(), blueprint_rx); + let mut task = BlueprintExecutor::new( + datastore.clone(), + blueprint_rx, + String::from("test-suite"), + ); // Now we're ready. // @@ -204,31 +212,30 @@ mod test { // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and - // reporting success. We reuse the same `OmicronZonesConfig` in - // constructing the blueprint because the details don't matter for this - // test. - let zones = OmicronZonesConfig { - generation: Generation::new(), - zones: vec![OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) - .parse() - .unwrap(), + // reporting success. + fn make_zones() -> OmicronZonesConfig { + OmicronZonesConfig { + generation: Generation::new(), + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), }, - dns_address: "oh-hello-internal-dns".into(), - gz_address: "::1".parse().unwrap(), - gz_address_index: 0, - http_address: "some-ipv6-address".into(), - }, - }], - }; - + }], + } + } let mut blueprint = create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), + (sled_id1, make_zones()), + (sled_id2, make_zones()), ])); blueprint_tx.send(Some(Arc::new(blueprint.clone()))).unwrap(); diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs index c34d2ab103..8886df81cd 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/blueprint_load.rs @@ -184,6 +184,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{Blueprint, BlueprintTarget}; + use omicron_common::api::external::Generation; use serde::Deserialize; use std::collections::{BTreeMap, BTreeSet}; use uuid::Uuid; @@ -206,6 +207,7 @@ mod test { omicron_zones: BTreeMap::new(), zones_in_service: BTreeSet::new(), parent_blueprint_id, + internal_dns_version: Generation::new(), time_created: now_db_precision(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 95fe5c933e..e588ac88fe 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -217,6 +217,7 @@ impl BackgroundTasks { let blueprint_executor = blueprint_execution::BlueprintExecutor::new( datastore.clone(), rx_blueprint.clone(), + nexus_id.to_string(), ); let task_blueprint_executor = driver.register( String::from("blueprint_executor"), diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 65f8f4d028..624ae8d93e 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -4,8 +4,10 @@ //! Configuration of the deployment system +use nexus_db_model::DnsGroup; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; @@ -24,6 +26,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; @@ -31,19 +34,15 @@ use omicron_common::api::external::LookupType; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; -/// "limit" used in SQL queries that paginate through all sleds, zpools, etc. -// unsafe: `new_unchecked` is only unsound if the argument is 0. -const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, creator: String, inventory: Option, + internal_dns_version: Generation, } impl super::Nexus { @@ -116,18 +115,7 @@ impl super::Nexus { let creator = self.id.to_string(); let datastore = self.datastore(); - let sled_rows = { - let mut all_sleds = Vec::new(); - let mut paginator = Paginator::new(SQL_BATCH_SIZE); - while let Some(p) = paginator.next() { - let batch = - datastore.sled_list(opctx, &p.current_pagparams()).await?; - paginator = - p.found_batch(&batch, &|s: &nexus_db_model::Sled| s.id()); - all_sleds.extend(batch); - } - all_sleds - }; + let sled_rows = datastore.sled_list_all_batched(opctx).await?; let mut zpools_by_sled_id = { let mut zpools = BTreeMap::new(); @@ -192,7 +180,22 @@ impl super::Nexus { "fetching latest inventory collection for blueprint planner", )?; - Ok(PlanningContext { creator, policy: Policy { sleds }, inventory }) + // Fetch the current internal DNS version. This could be made part of + // inventory, but it's enough of a one-off that there's no particular + // advantage to doing that work now. + let dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .internal_context( + "fetching internal DNS version for blueprint planning", + )?; + + Ok(PlanningContext { + creator, + policy: Policy { sleds }, + inventory, + internal_dns_version: *dns_version.version, + }) } async fn blueprint_add( @@ -215,6 +218,7 @@ impl super::Nexus { let planning_context = self.blueprint_planning_context(opctx).await?; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, + planning_context.internal_dns_version, &planning_context.policy, &planning_context.creator, ) @@ -249,6 +253,7 @@ impl super::Nexus { let planner = Planner::new_based_on( opctx.log.clone(), &parent_blueprint, + planning_context.internal_dns_version, &planning_context.policy, &planning_context.creator, &inventory, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index da21602cb1..b0fb7691e6 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -780,25 +780,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { ); } - pub async fn scrimlet_dns_setup(&mut self) { - let sled_agent = self - .sled_agent - .as_ref() - .expect("Cannot set up scrimlet DNS without sled agent"); - - let sa = match sled_agent.http_server.local_addr() { - SocketAddr::V6(sa) => sa, - SocketAddr::V4(_) => panic!("expected SocketAddrV6 for sled agent"), - }; - - for loc in [SwitchLocation::Switch0, SwitchLocation::Switch1] { - self.rack_init_builder - .internal_dns_config - .host_scrimlet(loc, sa) - .expect("add switch0 scrimlet dns entry"); - } - } - // Set up an external DNS server. pub async fn start_external_dns(&mut self) { let log = self.logctx.log.new(o!("component" => "external_dns_server")); @@ -1061,10 +1042,6 @@ async fn setup_with_config_impl( "start_crucible_pantry", Box::new(|builder| builder.start_crucible_pantry().boxed()), ), - ( - "scrimlet_dns_setup", - Box::new(|builder| builder.scrimlet_dns_setup().boxed()), - ), ( "populate_internal_dns", Box::new(|builder| builder.populate_internal_dns().boxed()), diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 3b4c3b3142..4c6e5e25e9 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -119,6 +119,10 @@ pub struct Blueprint { /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, + /// internal DNS version when this blueprint was created + // See blueprint generation for more on this. + pub internal_dns_version: Generation, + /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) @@ -145,8 +149,11 @@ impl Blueprint { self.omicron_zones.keys().copied() } - /// Summarize the difference between two blueprints - pub fn diff<'a>(&'a self, other: &'a Blueprint) -> OmicronZonesDiff<'a> { + /// Summarize the difference between sleds and zones between two blueprints + pub fn diff_sleds<'a>( + &'a self, + other: &'a Blueprint, + ) -> OmicronZonesDiff<'a> { OmicronZonesDiff { before_label: format!("blueprint {}", self.id), before_zones: self.omicron_zones.clone(), @@ -157,14 +164,15 @@ impl Blueprint { } } - /// Summarize the difference between a collection and a blueprint + /// Summarize the differences in sleds and zones between a collection and a + /// blueprint /// /// This gives an idea about what would change about a running system if one /// were to execute the blueprint. /// /// Note that collections do not currently include information about what /// zones are in-service, so the caller must provide that information. - pub fn diff_from_collection<'a>( + pub fn diff_sleds_from_collection<'a>( &'a self, collection: &'a Collection, before_zones_in_service: &'a BTreeSet, @@ -196,6 +204,8 @@ pub struct BlueprintMetadata { /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, + /// internal DNS version when this blueprint was created + pub internal_dns_version: Generation, /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index c99e51af4f..760d3918f4 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -134,6 +134,14 @@ impl Collection { ) -> impl Iterator { self.omicron_zones.values().flat_map(|z| z.zones.zones.iter()) } + + /// Iterate over the sled ids of sleds identified as Scrimlets + pub fn scrimlets(&self) -> impl Iterator + '_ { + self.sled_agents + .iter() + .filter(|(_, inventory)| inventory.sled_role == SledRole::Scrimlet) + .map(|(sled_id, _)| *sled_id) + } } /// A unique baseboard id found during a collection diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 103eb2e0c7..22e58c9251 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3082,6 +3082,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( -- now gone. parent_blueprint_id UUID, + -- identifies the latest internal DNS version when blueprint planning began + -- XXX-dap when it comes time for the migration, make sure to update + -- existing rows + internal_dns_version INT8, + -- These fields are for debugging only. time_created TIMESTAMPTZ NOT NULL, creator TEXT NOT NULL, diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index bed82a7a01..8fab8a0b8d 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -19,7 +19,6 @@ use omicron_common::address::{ SLED_PREFIX, }; use omicron_common::api::external::{MacAddr, Vni}; -use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::api::internal::shared::{ NetworkInterface, NetworkInterfaceKind, SourceNatConfig, }; @@ -298,36 +297,17 @@ impl Plan { "No scrimlets observed".to_string(), )); } - for (i, sled) in scrimlets.iter().enumerate() { + for sled in scrimlets.iter() { let address = get_switch_zone_address(sled.subnet); - let zone = - dns_builder.host_dendrite(sled.sled_id, address).unwrap(); dns_builder - .service_backend_zone( - ServiceName::Dendrite, - &zone, + .host_zone_switch( + sled.sled_id, + address, DENDRITE_PORT, - ) - .unwrap(); - dns_builder - .service_backend_zone( - ServiceName::ManagementGatewayService, - &zone, MGS_PORT, + MGD_PORT, ) .unwrap(); - dns_builder - .service_backend_zone(ServiceName::Mgd, &zone, MGD_PORT) - .unwrap(); - - // TODO only works for single rack - let sled_address = get_sled_address(sled.subnet); - let switch_location = if i == 0 { - SwitchLocation::Switch0 - } else { - SwitchLocation::Switch1 - }; - dns_builder.host_scrimlet(switch_location, sled_address).unwrap(); } // We'll stripe most services across all available Sleds, round-robin @@ -357,11 +337,11 @@ impl Plan { let dns_address = SocketAddrV6::new(ip, DNS_PORT, 0, 0); let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::InternalDns, - &zone, DNS_HTTP_PORT, ) .unwrap(); @@ -394,9 +374,13 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::COCKROACH_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone(ServiceName::Cockroach, &zone, port) + .host_zone_with_one_backend( + id, + ip, + ServiceName::Cockroach, + port, + ) .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::CockroachDb)?; @@ -430,11 +414,11 @@ impl Plan { let internal_ip = sled.addr_alloc.next().expect("Not enough addrs"); let http_port = omicron_common::address::DNS_HTTP_PORT; let http_address = SocketAddrV6::new(internal_ip, http_port, 0, 0); - let zone = dns_builder.host_zone(id, internal_ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + internal_ip, ServiceName::ExternalDns, - &zone, http_port, ) .unwrap(); @@ -466,11 +450,11 @@ impl Plan { }; let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + address, ServiceName::Nexus, - &zone, omicron_common::address::NEXUS_INTERNAL_PORT, ) .unwrap(); @@ -509,11 +493,11 @@ impl Plan { }; let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + address, ServiceName::Oximeter, - &zone, omicron_common::address::OXIMETER_PORT, ) .unwrap(); @@ -543,9 +527,13 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone(ServiceName::Clickhouse, &zone, port) + .host_zone_with_one_backend( + id, + ip, + ServiceName::Clickhouse, + port, + ) .unwrap(); let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::Clickhouse)?; @@ -575,11 +563,11 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_KEEPER_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::ClickhouseKeeper, - &zone, port, ) .unwrap(); @@ -608,9 +596,13 @@ impl Plan { let address = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PANTRY_PORT; let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, address).unwrap(); dns_builder - .service_backend_zone(ServiceName::CruciblePantry, &zone, port) + .host_zone_with_one_backend( + id, + address, + ServiceName::CruciblePantry, + port, + ) .unwrap(); sled.request.zones.push(OmicronZoneConfig { id, @@ -629,11 +621,11 @@ impl Plan { let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder - .service_backend_zone( + .host_zone_with_one_backend( + id, + ip, ServiceName::Crucible(id), - &zone, port, ) .unwrap(); @@ -656,7 +648,6 @@ impl Plan { for (idx, sled) in sled_info.iter_mut().enumerate() { let id = Uuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); - let zone = dns_builder.host_zone(id, address).unwrap(); let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { @@ -686,7 +677,9 @@ impl Plan { ) }; - dns_builder.service_backend_zone(svcname, &zone, NTP_PORT).unwrap(); + dns_builder + .host_zone_with_one_backend(id, address, svcname, NTP_PORT) + .unwrap(); sled.request.zones.push(OmicronZoneConfig { id,