From 45ccacd2c1c4168cacb3d8bff5a58c77666ac760 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 13 Mar 2024 20:53:10 -0700 Subject: [PATCH] update external DNS during blueprint execution (#5212) --- Cargo.lock | 1 + clients/dns-service-client/src/diff.rs | 9 +- clients/dns-service-client/src/lib.rs | 39 + dev-tools/omdb/src/bin/omdb/nexus.rs | 5 +- dev-tools/reconfigurator-cli/src/main.rs | 2 + .../tests/input/complex.json | 4 +- nexus/db-model/src/deployment.rs | 3 + nexus/db-model/src/schema.rs | 3 +- .../db-queries/src/db/datastore/deployment.rs | 20 +- nexus/db-queries/src/db/datastore/rack.rs | 93 +- nexus/db-queries/src/db/datastore/silo.rs | 31 + nexus/db-queries/src/db/datastore/vpc.rs | 4 + nexus/reconfigurator/execution/Cargo.toml | 3 +- nexus/reconfigurator/execution/src/dns.rs | 1037 +++++++++++++++-- nexus/reconfigurator/execution/src/lib.rs | 25 + .../execution/src/omicron_zones.rs | 1 + .../execution/src/overridables.rs | 121 ++ .../execution/src/resource_allocation.rs | 13 + nexus/reconfigurator/planning/Cargo.toml | 1 - .../planning/src/blueprint_builder.rs | 178 +-- nexus/reconfigurator/planning/src/example.rs | 147 +++ nexus/reconfigurator/planning/src/lib.rs | 1 + nexus/reconfigurator/planning/src/planner.rs | 24 +- .../src/app/background/blueprint_execution.rs | 5 +- nexus/src/app/background/blueprint_load.rs | 1 + nexus/src/app/deployment.rs | 16 +- nexus/src/app/external_endpoints.rs | 2 +- nexus/src/app/rack.rs | 2 +- nexus/src/app/silo.rs | 21 +- nexus/types/src/deployment.rs | 8 +- openapi/nexus-internal.json | 18 + schema/crdb/42.0.0/up1.sql | 8 + schema/crdb/42.0.0/up2.sql | 2 + schema/crdb/dbinit.sql | 6 +- 34 files changed, 1534 insertions(+), 320 deletions(-) create mode 100644 nexus/reconfigurator/execution/src/overridables.rs create mode 100644 nexus/reconfigurator/planning/src/example.rs create mode 100644 schema/crdb/42.0.0/up1.sql create mode 100644 schema/crdb/42.0.0/up2.sql diff --git a/Cargo.lock b/Cargo.lock index a45d47faf7..88c78a83d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4601,6 +4601,7 @@ dependencies = [ "nexus-inventory", "nexus-networking", "nexus-reconfigurator-planning", + "nexus-reconfigurator-preparation", "nexus-test-utils", "nexus-test-utils-macros", "nexus-types", diff --git a/clients/dns-service-client/src/diff.rs b/clients/dns-service-client/src/diff.rs index 39d51cc974..ce04319dff 100644 --- a/clients/dns-service-client/src/diff.rs +++ b/clients/dns-service-client/src/diff.rs @@ -59,8 +59,13 @@ impl<'a> DnsDiff<'a> { &self, ) -> impl Iterator { self.left.iter().filter_map(|(k, v1)| match self.right.get(k) { - Some(v2) if v1 != v2 => { - Some((k.as_ref(), v1.as_ref(), v2.as_ref())) + Some(v2) => { + let mut v1_sorted = v1.clone(); + let mut v2_sorted = v2.clone(); + v1_sorted.sort(); + v2_sorted.sort(); + (v1_sorted != v2_sorted) + .then(|| (k.as_ref(), v1.as_ref(), v2.as_ref())) } _ => None, }) diff --git a/clients/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs index cd17a1559c..316c4787b0 100644 --- a/clients/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -109,3 +109,42 @@ impl types::DnsConfigParams { Ok(&self.zones[0]) } } + +impl Ord for types::DnsRecord { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + use types::DnsRecord; + match (self, other) { + // Same kinds: compare the items in them + (DnsRecord::A(addr1), DnsRecord::A(addr2)) => addr1.cmp(addr2), + (DnsRecord::Aaaa(addr1), DnsRecord::Aaaa(addr2)) => { + addr1.cmp(addr2) + } + (DnsRecord::Srv(srv1), DnsRecord::Srv(srv2)) => srv1 + .target + .cmp(&srv2.target) + .then_with(|| srv1.port.cmp(&srv2.port)), + + // Different kinds: define an arbitrary order among the kinds. + // We could use std::mem::discriminant() here but it'd be nice if + // this were stable over time. + // We define (arbitrarily): A < Aaaa < Srv + (DnsRecord::A(_), DnsRecord::Aaaa(_) | DnsRecord::Srv(_)) => { + std::cmp::Ordering::Less + } + (DnsRecord::Aaaa(_), DnsRecord::Srv(_)) => std::cmp::Ordering::Less, + + // Anything else will result in "Greater". But let's be explicit. + (DnsRecord::Aaaa(_), DnsRecord::A(_)) + | (DnsRecord::Srv(_), DnsRecord::A(_)) + | (DnsRecord::Srv(_), DnsRecord::Aaaa(_)) => { + std::cmp::Ordering::Greater + } + } + } +} + +impl PartialOrd for types::DnsRecord { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 3d63f48469..26f2e07a41 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1073,7 +1073,10 @@ async fn cmd_nexus_blueprints_generate_from_collection( ) .await .context("creating blueprint from collection id")?; - eprintln!("created blueprint {} from collection id", blueprint.id); + eprintln!( + "created blueprint {} from collection id {}", + blueprint.id, args.collection_id + ); Ok(()) } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index fab4237ebd..82b71cff49 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -454,6 +454,7 @@ fn cmd_blueprint_from_inventory( let blueprint = BlueprintBuilder::build_initial_from_collection( collection, dns_version, + dns_version, &policy, creator, ) @@ -487,6 +488,7 @@ fn cmd_blueprint_plan( sim.log.clone(), parent_blueprint, dns_version, + dns_version, &policy, creator, collection, diff --git a/dev-tools/reconfigurator-cli/tests/input/complex.json b/dev-tools/reconfigurator-cli/tests/input/complex.json index 5bdba202d3..67f1881147 100644 --- a/dev-tools/reconfigurator-cli/tests/input/complex.json +++ b/dev-tools/reconfigurator-cli/tests/input/complex.json @@ -7973,6 +7973,7 @@ ], "parent_blueprint_id": null, "internal_dns_version": 1, + "external_dns_version": 1, "time_created": "2024-03-12T19:09:41.953854Z", "creator": "0d459323-e414-4f32-9944-76c7331da622", "comment": "from collection 3f61b723-cfe7-40ec-b22c-e3e1f35325f9" @@ -8645,9 +8646,10 @@ ], "parent_blueprint_id": null, "internal_dns_version": 1, + "external_dns_version": 1, "time_created": "2024-03-12T19:11:49.876621Z", "creator": "0d459323-e414-4f32-9944-76c7331da622", "comment": "from collection 5e1d6ae0-c5b4-4884-ac41-680cd4f5762d" } ] -} \ No newline at end of file +} diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index e9a650812b..e34148bede 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -25,6 +25,7 @@ pub struct Blueprint { pub id: Uuid, pub parent_blueprint_id: Option, pub internal_dns_version: Generation, + pub external_dns_version: Generation, pub time_created: DateTime, pub creator: String, pub comment: String, @@ -36,6 +37,7 @@ impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { id: bp.id, parent_blueprint_id: bp.parent_blueprint_id, internal_dns_version: Generation(bp.internal_dns_version), + external_dns_version: Generation(bp.external_dns_version), time_created: bp.time_created, creator: bp.creator.clone(), comment: bp.comment.clone(), @@ -49,6 +51,7 @@ impl From for nexus_types::deployment::BlueprintMetadata { id: value.id, parent_blueprint_id: value.parent_blueprint_id, internal_dns_version: *value.internal_dns_version, + external_dns_version: *value.external_dns_version, time_created: value.time_created, creator: value.creator, comment: value.comment, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 781f56fe0c..bcbf7fa88f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(41, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(42, 0, 0); table! { disk (id) { @@ -1441,6 +1441,7 @@ table! { comment -> Text, internal_dns_version -> Int8, + external_dns_version -> Int8, } } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index c2b4e998ce..1a12b9d250 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -229,6 +229,7 @@ impl DataStore { let ( parent_blueprint_id, internal_dns_version, + external_dns_version, time_created, creator, comment, @@ -251,6 +252,7 @@ impl DataStore { ( blueprint.parent_blueprint_id, *blueprint.internal_dns_version, + *blueprint.external_dns_version, blueprint.time_created, blueprint.creator, blueprint.comment, @@ -480,6 +482,7 @@ impl DataStore { zones_in_service, parent_blueprint_id, internal_dns_version, + external_dns_version, time_created, creator, comment, @@ -1271,6 +1274,7 @@ mod tests { let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), + Generation::new(), &policy, "test", ) @@ -1305,6 +1309,7 @@ mod tests { let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test", ) @@ -1432,11 +1437,13 @@ mod tests { // Create a builder for a child blueprint. While we're at it, use a // different DNS version to test that that works. - let new_dns_version = blueprint1.internal_dns_version.next(); + let new_internal_dns_version = blueprint1.internal_dns_version.next(); + let new_external_dns_version = new_internal_dns_version.next(); let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, - new_dns_version, + new_internal_dns_version, + new_external_dns_version, &policy, "test", ) @@ -1488,7 +1495,8 @@ mod tests { .expect("failed to read collection back"); println!("diff: {}", blueprint2.diff_sleds(&blueprint_read).display()); assert_eq!(blueprint2, blueprint_read); - assert_eq!(blueprint2.internal_dns_version, new_dns_version); + assert_eq!(blueprint2.internal_dns_version, new_internal_dns_version); + assert_eq!(blueprint2.external_dns_version, new_external_dns_version); { let mut expected_ids = [blueprint1.id, blueprint2.id]; expected_ids.sort(); @@ -1581,6 +1589,7 @@ mod tests { let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test1", ) @@ -1589,6 +1598,7 @@ mod tests { &logctx.log, &blueprint1, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test2", ) @@ -1598,6 +1608,7 @@ mod tests { &logctx.log, &blueprint1, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test3", ) @@ -1695,6 +1706,7 @@ mod tests { &logctx.log, &blueprint3, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test3", ) @@ -1734,6 +1746,7 @@ mod tests { let blueprint1 = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test1", ) @@ -1742,6 +1755,7 @@ mod tests { &logctx.log, &blueprint1, Generation::new(), + Generation::new(), &EMPTY_POLICY, "test2", ) diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 3224f36b8d..fa4d6a4210 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -27,7 +27,6 @@ use crate::db::model::Rack; use crate::db::model::Zpool; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; -use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -44,6 +43,8 @@ use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; use nexus_db_model::SledUnderlaySubnetAllocation; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::OmicronZoneType; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -54,6 +55,7 @@ use nexus_types::internal_api::params as internal_params; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; @@ -793,60 +795,49 @@ impl DataStore { pub async fn nexus_external_addresses( &self, opctx: &OpContext, + blueprint: Option<&Blueprint>, ) -> Result<(Vec, Vec), Error> { opctx.authorize(authz::Action::Read, &authz::DNS_CONFIG).await?; - use crate::db::schema::external_ip::dsl as extip_dsl; - use crate::db::schema::service::dsl as service_dsl; - - let err = OptionalError::new(); - let conn = self.pool_connection_authorized(opctx).await?; - self.transaction_retry_wrapper("nexus_external_addresses") - .transaction(&conn, |conn| { - let err = err.clone(); - async move { - let ips = extip_dsl::external_ip - .inner_join( - service_dsl::service.on(service_dsl::id - .eq(extip_dsl::parent_id.assume_not_null())), - ) - .filter(extip_dsl::parent_id.is_not_null()) - .filter(extip_dsl::time_deleted.is_null()) - .filter(extip_dsl::is_service) - .filter( - service_dsl::kind.eq(db::model::ServiceKind::Nexus), - ) - .select(ExternalIp::as_select()) - .get_results_async(&conn) - .await? - .into_iter() - .map(|external_ip| external_ip.ip.ip()) - .collect(); - - let dns_zones = self - .dns_zones_list_all_on_connection( - opctx, - &conn, - DnsGroup::External, - ) - .await - .map_err(|e| match e.retryable() { - NotRetryable(not_retryable_err) => { - err.bail(not_retryable_err) - } - Retryable(retryable_err) => retryable_err, - })?; - - Ok((ips, dns_zones)) - } - }) + let dns_zones = self + .dns_zones_list_all(opctx, DnsGroup::External) .await - .map_err(|e| { - if let Some(err) = err.take() { - return err.into(); - } - public_error_from_diesel(e, ErrorHandler::Server) - }) + .internal_context("listing DNS zones to list external addresses")?; + + let nexus_external_ips = if let Some(blueprint) = blueprint { + blueprint + .all_omicron_zones() + .filter_map(|(_, z)| match z.zone_type { + OmicronZoneType::Nexus { external_ip, .. } => { + Some(external_ip) + } + _ => None, + }) + .collect() + } else { + use crate::db::schema::external_ip::dsl as extip_dsl; + use crate::db::schema::service::dsl as service_dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + extip_dsl::external_ip + .inner_join(service_dsl::service.on( + service_dsl::id.eq(extip_dsl::parent_id.assume_not_null()), + )) + .filter(extip_dsl::parent_id.is_not_null()) + .filter(extip_dsl::time_deleted.is_null()) + .filter(extip_dsl::is_service) + .filter(service_dsl::kind.eq(db::model::ServiceKind::Nexus)) + .select(ExternalIp::as_select()) + .get_results_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|external_ip| external_ip.ip.ip()) + .collect() + }; + + Ok((nexus_external_ips, dns_zones)) } } diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index df10b1c072..0fd858b900 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -6,6 +6,7 @@ use super::dns::DnsVersionUpdateBuilder; use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -22,6 +23,7 @@ use crate::db::model::Name; use crate::db::model::Silo; use crate::db::model::VirtualProvisioningCollection; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; @@ -45,6 +47,7 @@ use ref_cast::RefCast; use uuid::Uuid; /// Filter a "silo_list" query based on silos' discoverability +#[derive(Debug, Clone, Copy)] pub enum Discoverability { /// Show all Silos All, @@ -351,6 +354,34 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// List all Silos, making as many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn silo_list_all_batched( + &self, + opctx: &OpContext, + discoverability: Discoverability, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + let mut all_silos = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .silos_list( + opctx, + &PaginatedBy::Id(p.current_pagparams()), + discoverability, + ) + .await?; + paginator = + p.found_batch(&batch, &|s: &nexus_db_model::Silo| s.id()); + all_silos.extend(batch); + } + Ok(all_silos) + } + pub async fn silo_delete( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 8c5d5f4f46..8be21348bf 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1674,6 +1674,7 @@ mod tests { zones_in_service: BTreeSet::new(), parent_blueprint_id: None, internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: Utc::now(), creator: "test".to_string(), comment: "test".to_string(), @@ -1725,6 +1726,7 @@ mod tests { zones_in_service: BTreeSet::new(), parent_blueprint_id: Some(bp1_id), internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: Utc::now(), creator: "test".to_string(), comment: "test".to_string(), @@ -1785,6 +1787,7 @@ mod tests { zones_in_service: BTreeSet::new(), parent_blueprint_id: Some(bp2_id), internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: Utc::now(), creator: "test".to_string(), comment: "test".to_string(), @@ -1837,6 +1840,7 @@ mod tests { zones_in_service: BTreeSet::new(), parent_blueprint_id: Some(bp3_id), internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: Utc::now(), creator: "test".to_string(), comment: "test".to_string(), diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index d48f4c6b5b..b479ae67ee 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -9,6 +9,7 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true dns-service-client.workspace = true +chrono.workspace = true futures.workspace = true illumos-utils.workspace = true internal-dns.workspace = true @@ -32,10 +33,10 @@ pq-sys = "*" omicron-workspace-hack.workspace = true [dev-dependencies] -chrono.workspace = true httptest.workspace = true ipnet.workspace = true nexus-reconfigurator-planning.workspace = true +nexus-reconfigurator-preparation.workspace = true nexus-inventory.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 0fa8eb1c10..9e0434c59f 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -2,37 +2,35 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Propagates internal DNS changes in a given blueprint +//! Propagates DNS changes in a given blueprint +use crate::overridables::Overridables; use crate::Sled; +use anyhow::Context; use dns_service_client::DnsDiff; use internal_dns::DnsConfigBuilder; use internal_dns::ServiceName; use nexus_db_model::DnsGroup; +use nexus_db_model::Silo; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; +use nexus_db_queries::db::fixed_data::silo::SILO_ID; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneType; +use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; -use omicron_common::address::get_switch_zone_address; -use omicron_common::address::CLICKHOUSE_KEEPER_PORT; -use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::address::COCKROACH_PORT; -use omicron_common::address::CRUCIBLE_PANTRY_PORT; -use omicron_common::address::CRUCIBLE_PORT; -use omicron_common::address::DENDRITE_PORT; -use omicron_common::address::DNS_HTTP_PORT; -use omicron_common::address::MGD_PORT; -use omicron_common::address::MGS_PORT; -use omicron_common::address::NEXUS_INTERNAL_PORT; -use omicron_common::address::NTP_PORT; -use omicron_common::address::OXIMETER_PORT; +use nexus_types::internal_api::params::DnsConfigZone; +use nexus_types::internal_api::params::DnsRecord; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use slog::{debug, info, o}; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::net::IpAddr; +use std::net::SocketAddrV6; use uuid::Uuid; pub(crate) async fn deploy_dns( @@ -41,12 +39,17 @@ pub(crate) async fn deploy_dns( creator: String, blueprint: &Blueprint, sleds_by_id: &BTreeMap, + overrides: &Overridables, ) -> Result<(), Error> { - // First, fetch the current DNS config. - let dns_config_current = datastore + // First, fetch the current DNS configs. + let internal_dns_config_current = datastore .dns_config_read(opctx, DnsGroup::Internal) .await - .internal_context("reading current DNS")?; + .internal_context("reading current DNS (internal)")?; + let external_dns_config_current = datastore + .dns_config_read(opctx, DnsGroup::External) + .await + .internal_context("reading current DNS (external)")?; // We could check here that the DNS version we found isn't newer than when // the blueprint was generated. But we have to check later when we try to @@ -55,18 +58,83 @@ pub(crate) async fn deploy_dns( // we know it's being hit when we exercise this condition. // Next, construct the DNS config represented by the blueprint. - let dns_config_blueprint = blueprint_dns_config(blueprint, sleds_by_id); + let internal_dns_config_blueprint = + blueprint_internal_dns_config(blueprint, sleds_by_id, overrides) + .map_err(|e| { + Error::internal_error(&format!( + "error constructing internal DNS config: {:#}", + e + )) + })?; + let silos = datastore + .silo_list_all_batched(opctx, Discoverability::All) + .await + .internal_context("listing Silos (for configuring external DNS)")? + .into_iter() + // We do not generate a DNS name for the "default" Silo. + .filter(|silo| silo.id() != *SILO_ID) + .collect::>(); + + let (nexus_external_ips, nexus_external_dns_zones) = + datastore.nexus_external_addresses(opctx, Some(blueprint)).await?; + let nexus_external_dns_zone_names = nexus_external_dns_zones + .into_iter() + .map(|z| z.zone_name) + .collect::>(); + let external_dns_config_blueprint = blueprint_external_dns_config( + blueprint, + &nexus_external_ips, + &silos, + &nexus_external_dns_zone_names, + ); + + // Deploy the changes. + deploy_dns_one( + opctx, + datastore, + creator.clone(), + blueprint, + &internal_dns_config_current, + &internal_dns_config_blueprint, + DnsGroup::Internal, + ) + .await?; + deploy_dns_one( + opctx, + datastore, + creator, + blueprint, + &external_dns_config_current, + &external_dns_config_blueprint, + DnsGroup::External, + ) + .await?; + Ok(()) +} + +pub(crate) async fn deploy_dns_one( + opctx: &OpContext, + datastore: &DataStore, + creator: String, + blueprint: &Blueprint, + dns_config_current: &DnsConfigParams, + dns_config_blueprint: &DnsConfigParams, + dns_group: DnsGroup, +) -> Result<(), Error> { + let log = opctx + .log + .new(o!("blueprint_execution" => format!("dns {:?}", dns_group))); // Looking at the current contents of DNS, prepare an update that will make // it match what it should be. - let log = opctx.log.new(o!("blueprint_execution" => "DNS")); let comment = format!("blueprint {} ({})", blueprint.id, blueprint.comment); let maybe_update = dns_compute_update( &log, + dns_group, comment, creator, - &dns_config_current, - &dns_config_blueprint, + dns_config_current, + dns_config_blueprint, )?; let Some(update) = maybe_update else { // Nothing to do. @@ -81,12 +149,11 @@ pub(crate) async fn deploy_dns( // executing a newer target blueprint. // // To avoid this problem, before generating a blueprint, Nexus fetches the - // current internal DNS generation and stores that into the blueprint - // itself. Here, when we execute the blueprint, we make our database update - // conditional on that still being the current internal DNS generation. - // If some other instance has already come along and updated the database, - // whether for this same blueprint or a newer one, our attempt to update the - // database will fail. + // current DNS generation and stores that into the blueprint itself. Here, + // when we execute the blueprint, we make our database update conditional on + // that still being the current DNS generation. If some other instance has + // already come along and updated the database, whether for this same + // blueprint or a newer one, our attempt to update the database will fail. // // Let's look at a tricky example. Suppose: // @@ -100,7 +167,7 @@ pub(crate) async fn deploy_dns( // that's still the current version in DNS. B3 is made the current // target. // - // Assume B2 and B3 specify different internal DNS contents (e.g., have a + // Assume B2 and B3 specify different DNS contents (e.g., have a // different set of Omicron zones in them). // // 4. Nexus instance N1 finds B2 to be the current target and starts @@ -121,21 +188,21 @@ pub(crate) async fn deploy_dns( // Now, one of two things could happen: // // 1. N1 wins. Its database update applies successfully. In the database, - // the internal DNS version becomes version 4. In this case, N2 loses. - // Its database operation fails altogether. At this point, any - // subsequent attempt to execute blueprint B3 will fail because any DNS - // update will be conditional on the database having version 3. The only - // way out of this is for the planner to generate a new blueprint B4 - // that's exactly equivalent to B3 except that the stored internal DNS - // version is 4. Then we'll be able to execute that. + // the DNS version becomes version 4. In this case, N2 loses. Its + // database operation fails altogether. At this point, any subsequent + // attempt to execute blueprint B3 will fail because any DNS update will + // be conditional on the database having version 3. The only way out of + // this is for the planner to generate a new blueprint B4 that's exactly + // equivalent to B3 except that the stored DNS version is 4. Then we'll + // be able to execute that. // // 2. N2 wins. Its database update applies successfully. In the database, - // the internal DNS version becomes version 4. In this case, N1 loses. - // Its database operation fails altogether. At this point, any - // subsequent attempt to execute blueprint B3 will fail because any DNS - // update will be conditional on the databae having version 3. No - // further action is needed, though, because we've successfully executed - // the latest target blueprint. + // the DNS version becomes version 4. In this case, N1 loses. Its + // database operation fails altogether. At this point, any subsequent + // attempt to execute blueprint B3 will fail because any DNS update will + // be conditional on the databae having version 3. No further action is + // needed, though, because we've successfully executed the latest target + // blueprint. // // In both cases, the system will (1) converge to having successfully // executed the target blueprint, and (2) never have rolled any changes back @@ -149,7 +216,7 @@ pub(crate) async fn deploy_dns( let generation_u32 = u32::try_from(dns_config_current.generation).map_err(|e| { Error::internal_error(&format!( - "internal DNS generation got too large: {}", + "DNS generation got too large: {}", e, )) })?; @@ -159,10 +226,11 @@ pub(crate) async fn deploy_dns( } /// Returns the expected contents of internal DNS based on the given blueprint -pub fn blueprint_dns_config( +pub fn blueprint_internal_dns_config( blueprint: &Blueprint, sleds_by_id: &BTreeMap, -) -> DnsConfigParams { + overrides: &Overridables, +) -> Result { // The DNS names configured here should match what RSS configures for the // same zones. It's tricky to have RSS share the same code because it uses // Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're @@ -171,52 +239,72 @@ pub fn blueprint_dns_config( // the details. let mut dns_builder = DnsConfigBuilder::new(); - // The code below assumes that all zones are using the default port numbers. - // That should be true, as those are the only ports ever used today. - // In an ideal world, the correct port would be pulled out of the - // `OmicronZoneType` variant instead. Although that information is present, - // it's irritatingly non-trivial to do right now because SocketAddrs are - // represented as strings, so we'd need to parse all of them and handle all - // the errors, even though they should never happen. - // See oxidecomputer/omicron#4988. + // It's annoying that we have to parse this because it really should be + // valid already. See oxidecomputer/omicron#4988. + fn parse_port(address: &str) -> Result { + address + .parse::() + .with_context(|| format!("parsing socket address {:?}", address)) + .map(|addr| addr.port()) + } + for (_, omicron_zone) in blueprint.all_omicron_zones() { if !blueprint.zones_in_service.contains(&omicron_zone.id) { continue; } - let (service_name, port) = match omicron_zone.zone_type { - OmicronZoneType::BoundaryNtp { .. } => { - (ServiceName::BoundaryNtp, NTP_PORT) + let context = || { + format!( + "parsing {} zone with id {}", + omicron_zone.zone_type.label(), + omicron_zone.id + ) + }; + let (service_name, port) = match &omicron_zone.zone_type { + OmicronZoneType::BoundaryNtp { address, .. } => { + let port = parse_port(&address).with_context(context)?; + (ServiceName::BoundaryNtp, port) } - OmicronZoneType::InternalNtp { .. } => { - (ServiceName::InternalNtp, NTP_PORT) + OmicronZoneType::InternalNtp { address, .. } => { + let port = parse_port(&address).with_context(context)?; + (ServiceName::InternalNtp, port) } - OmicronZoneType::Clickhouse { .. } => { - (ServiceName::Clickhouse, CLICKHOUSE_PORT) + OmicronZoneType::Clickhouse { address, .. } => { + let port = parse_port(&address).with_context(context)?; + (ServiceName::Clickhouse, port) } - OmicronZoneType::ClickhouseKeeper { .. } => { - (ServiceName::ClickhouseKeeper, CLICKHOUSE_KEEPER_PORT) + OmicronZoneType::ClickhouseKeeper { address, .. } => { + let port = parse_port(&address).with_context(context)?; + (ServiceName::ClickhouseKeeper, port) } - OmicronZoneType::CockroachDb { .. } => { - (ServiceName::Cockroach, COCKROACH_PORT) + OmicronZoneType::CockroachDb { address, .. } => { + let port = parse_port(&address).with_context(context)?; + (ServiceName::Cockroach, port) } - OmicronZoneType::Nexus { .. } => { - (ServiceName::Nexus, NEXUS_INTERNAL_PORT) + OmicronZoneType::Nexus { internal_address, .. } => { + let port = + parse_port(internal_address).with_context(context)?; + (ServiceName::Nexus, port) } - OmicronZoneType::Crucible { .. } => { - (ServiceName::Crucible(omicron_zone.id), CRUCIBLE_PORT) + OmicronZoneType::Crucible { address, .. } => { + let port = parse_port(address).with_context(context)?; + (ServiceName::Crucible(omicron_zone.id), port) } - OmicronZoneType::CruciblePantry { .. } => { - (ServiceName::CruciblePantry, CRUCIBLE_PANTRY_PORT) + OmicronZoneType::CruciblePantry { address } => { + let port = parse_port(address).with_context(context)?; + (ServiceName::CruciblePantry, port) } - OmicronZoneType::Oximeter { .. } => { - (ServiceName::Oximeter, OXIMETER_PORT) + OmicronZoneType::Oximeter { address } => { + let port = parse_port(address).with_context(context)?; + (ServiceName::Oximeter, port) } - OmicronZoneType::ExternalDns { .. } => { - (ServiceName::ExternalDns, DNS_HTTP_PORT) + OmicronZoneType::ExternalDns { http_address, .. } => { + let port = parse_port(http_address).with_context(context)?; + (ServiceName::ExternalDns, port) } - OmicronZoneType::InternalDns { .. } => { - (ServiceName::InternalDns, DNS_HTTP_PORT) + OmicronZoneType::InternalDns { http_address, .. } => { + let port = parse_port(http_address).with_context(context)?; + (ServiceName::InternalDns, port) } }; @@ -235,15 +323,15 @@ pub fn blueprint_dns_config( let scrimlets = sleds_by_id.values().filter(|sled| sled.is_scrimlet); for scrimlet in scrimlets { let sled_subnet = scrimlet.subnet(); - let switch_zone_ip = get_switch_zone_address(sled_subnet); + let switch_zone_ip = overrides.switch_zone_ip(scrimlet.id, sled_subnet); // unwrap(): see above. dns_builder .host_zone_switch( scrimlet.id, switch_zone_ip, - DENDRITE_PORT, - MGS_PORT, - MGD_PORT, + overrides.dendrite_port(scrimlet.id), + overrides.mgs_port(scrimlet.id), + overrides.mgd_port(scrimlet.id), ) .unwrap(); } @@ -252,18 +340,52 @@ pub fn blueprint_dns_config( // whatever it was when this blueprint was generated. This will only be // used if the generated DNS contents are different from what's current. dns_builder.generation(blueprint.internal_dns_version.next()); - dns_builder.build() + Ok(dns_builder.build()) +} + +pub fn blueprint_external_dns_config( + blueprint: &Blueprint, + nexus_external_ips: &[IpAddr], + silos: &[Silo], + external_dns_zone_names: &[String], +) -> DnsConfigParams { + let dns_records: Vec = nexus_external_ips + .into_iter() + .map(|addr| match addr { + IpAddr::V4(addr) => DnsRecord::A(*addr), + IpAddr::V6(addr) => DnsRecord::Aaaa(*addr), + }) + .collect(); + + let records = silos + .into_iter() + .map(|silo| (silo_dns_name(&silo.name()), dns_records.clone())) + .collect::>>(); + + let zones = external_dns_zone_names + .into_iter() + .map(|zone_name| DnsConfigZone { + zone_name: zone_name.to_owned(), + records: records.clone(), + }) + .collect(); + + DnsConfigParams { + generation: u64::from(blueprint.external_dns_version.next()), + time_created: chrono::Utc::now(), + zones, + } } fn dns_compute_update( log: &slog::Logger, + dns_group: DnsGroup, comment: String, creator: String, current_config: &DnsConfigParams, new_config: &DnsConfigParams, ) -> Result, Error> { - let mut update = - DnsVersionUpdateBuilder::new(DnsGroup::Internal, comment, creator); + let mut update = DnsVersionUpdateBuilder::new(dns_group, comment, creator); let diff = DnsDiff::new(¤t_config, &new_config) .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; @@ -276,7 +398,7 @@ fn dns_compute_update( debug!( log, "adding name"; - "name" => name, + "dns_name" => name, "new_records" => ?new_records, ); update.add_name( @@ -289,7 +411,7 @@ fn dns_compute_update( debug!( log, "removing name"; - "name" => name, + "dns_name" => name, "old_records" => ?old_records, ); update.remove_name(name.to_string())?; @@ -299,7 +421,7 @@ fn dns_compute_update( debug!( log, "updating name"; - "name" => name, + "dns_name" => name, "old_records" => ?old_records, "new_records" => ?new_records, ); @@ -313,42 +435,83 @@ fn dns_compute_update( Ok(Some(update)) } +/// Returns the (relative) DNS name for this Silo's API and console endpoints +/// _within_ the external DNS zone (i.e., without that zone's suffix) +/// +/// This specific naming scheme is determined under RFD 357. +pub fn silo_dns_name(name: &omicron_common::api::external::Name) -> String { + // RFD 4 constrains resource names (including Silo names) to DNS-safe + // strings, which is why it's safe to directly put the name of the + // resource into the DNS name rather than doing any kind of escaping. + format!("{}.sys", name) +} + #[cfg(test)] mod test { - use super::blueprint_dns_config; - use super::dns_compute_update; + use super::*; + use crate::overridables::Overridables; use crate::Sled; + use dns_service_client::DnsDiff; use internal_dns::ServiceName; use internal_dns::DNS_ZONE; + use nexus_db_model::DnsGroup; + use nexus_db_model::Silo; + use nexus_db_queries::authn; + use nexus_db_queries::authz; + use nexus_db_queries::context::OpContext; + use nexus_db_queries::db::DataStore; use nexus_inventory::CollectionBuilder; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; + use nexus_reconfigurator_planning::example::example; + use nexus_reconfigurator_preparation::policy_from_db; + use nexus_test_utils::resource_helpers::create_silo; + use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::Blueprint; + use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; + use nexus_types::external_api::params; + use nexus_types::external_api::shared; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; + use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; use nexus_types::internal_api::params::DnsRecord; + use nexus_types::internal_api::params::Srv; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; + use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; + use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; + use omicron_common::api::external::Error; use omicron_common::api::external::Generation; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_test_utils::dev::poll::wait_for_condition; + use omicron_test_utils::dev::poll::CondCheckError; use omicron_test_utils::dev::test_setup_log; + use slog::{debug, info}; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; + use std::net::IpAddr; use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::str::FromStr; + use std::sync::Arc; + use std::time::Duration; use uuid::Uuid; + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + fn blueprint_empty() -> Blueprint { let builder = CollectionBuilder::new("test-suite"); let collection = builder.build(); @@ -360,6 +523,7 @@ mod test { BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), + Generation::new(), &policy, "test-suite", ) @@ -377,11 +541,16 @@ mod test { } } - /// test blueprint_dns_config(): trivial case of an empty blueprint + /// test blueprint_internal_dns_config(): trivial case of an empty blueprint #[test] - fn test_blueprint_dns_empty() { + fn test_blueprint_internal_dns_empty() { let blueprint = blueprint_empty(); - let blueprint_dns = blueprint_dns_config(&blueprint, &BTreeMap::new()); + let blueprint_dns = blueprint_internal_dns_config( + &blueprint, + &BTreeMap::new(), + &Default::default(), + ) + .unwrap(); assert!(blueprint_dns.sole_zone().unwrap().records.is_empty()); } @@ -389,7 +558,7 @@ mod test { /// - one of each type of zone in service /// - some zones not in service #[test] - fn test_blueprint_dns_basic() { + fn test_blueprint_internal_dns_basic() { // We'll use the standard representative inventory collection to build a // blueprint. The main thing we care about here is that we have at // least one zone of each type. Later, we'll mark a couple of the sleds @@ -434,6 +603,7 @@ mod test { let mut blueprint = BlueprintBuilder::build_initial_from_collection( &collection, initial_dns_generation, + Generation::new(), &policy, "test-suite", ) @@ -477,8 +647,12 @@ mod test { }) .collect(); - let dns_config_blueprint = - blueprint_dns_config(&blueprint, &sleds_by_id); + let dns_config_blueprint = blueprint_internal_dns_config( + &blueprint, + &sleds_by_id, + &Default::default(), + ) + .unwrap(); assert_eq!( dns_config_blueprint.generation, u64::from(initial_dns_generation.next()) @@ -655,6 +829,143 @@ mod test { assert!(srv_kinds_expected.is_empty()); } + #[tokio::test] + async fn test_blueprint_external_dns_basic() { + static TEST_NAME: &str = "test_blueprint_external_dns_basic"; + let logctx = test_setup_log(TEST_NAME); + let (collection, policy) = example(&logctx.log, TEST_NAME, 5); + let initial_external_dns_generation = Generation::new(); + let blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + Generation::new(), + initial_external_dns_generation, + &policy, + "test suite", + ) + .expect("failed to generate initial blueprint"); + + let my_silo = Silo::new(params::SiloCreate { + identity: IdentityMetadataCreateParams { + name: "my-silo".parse().unwrap(), + description: String::new(), + }, + quotas: params::SiloQuotasCreate::empty(), + discoverable: false, + identity_mode: shared::SiloIdentityMode::SamlJit, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }) + .unwrap(); + + let nexus_external_ips: Vec<_> = blueprint + .all_omicron_zones() + .filter_map(|(_, z)| match &z.zone_type { + OmicronZoneType::Nexus { external_ip, .. } => { + Some(*external_ip) + } + _ => None, + }) + .collect(); + + // It shouldn't ever be possible to have no Silos at all, but at least + // make sure we don't panic. + let external_dns_config = blueprint_external_dns_config( + &blueprint, + &nexus_external_ips, + &[], + &[String::from("oxide.test")], + ); + assert_eq!( + external_dns_config.generation, + u64::from(initial_external_dns_generation.next()) + ); + assert_eq!(external_dns_config.zones.len(), 1); + assert_eq!(external_dns_config.zones[0].zone_name, "oxide.test"); + assert!(external_dns_config.zones[0].records.is_empty()); + + // Same with external DNS zones. + let external_dns_config = blueprint_external_dns_config( + &blueprint, + &nexus_external_ips, + std::slice::from_ref(&my_silo), + &[], + ); + assert_eq!( + external_dns_config.generation, + u64::from(initial_external_dns_generation.next()) + ); + assert!(external_dns_config.zones.is_empty()); + + // Same with external IPs. + let external_dns_config = blueprint_external_dns_config( + &blueprint, + &[], + std::slice::from_ref(&my_silo), + &[String::from("oxide.test")], + ); + assert_eq!( + external_dns_config.generation, + u64::from(initial_external_dns_generation.next()) + ); + + // Now check a more typical case. (Although we wouldn't normally have + // more than one external DNS zone, it's a more general case and pretty + // easy to test.) + let external_dns_config = blueprint_external_dns_config( + &blueprint, + &nexus_external_ips, + std::slice::from_ref(&my_silo), + &[String::from("oxide1.test"), String::from("oxide2.test")], + ); + assert_eq!( + external_dns_config.generation, + u64::from(initial_external_dns_generation.next()) + ); + assert_eq!(external_dns_config.zones.len(), 2); + assert_eq!( + external_dns_config.zones[0].records, + external_dns_config.zones[1].records + ); + assert_eq!( + external_dns_config.zones[0].zone_name, + String::from("oxide1.test"), + ); + assert_eq!( + external_dns_config.zones[1].zone_name, + String::from("oxide2.test"), + ); + let records = &external_dns_config.zones[0].records; + assert_eq!(records.len(), 1); + let silo_records = records + .get(&silo_dns_name(my_silo.name())) + .expect("missing silo DNS records"); + + // Here we're hardcoding the contents of the example blueprint. It + // currently puts one Nexus zone on each sled. If we change the example + // blueprint, change the expected set of IPs here. + let mut silo_record_ips: Vec<_> = silo_records + .into_iter() + .map(|record| match record { + DnsRecord::A(v) => IpAddr::V4(*v), + DnsRecord::Aaaa(v) => IpAddr::V6(*v), + DnsRecord::Srv(_) => panic!("unexpected SRV record"), + }) + .collect(); + silo_record_ips.sort(); + assert_eq!( + silo_record_ips, + &[ + "192.0.2.2".parse::().unwrap(), + "192.0.2.3".parse::().unwrap(), + "192.0.2.4".parse::().unwrap(), + "192.0.2.5".parse::().unwrap(), + "192.0.2.6".parse::().unwrap(), + ] + ); + logctx.cleanup_successful(); + } + #[test] fn test_dns_compute_update() { let logctx = test_setup_log("dns_compute_update"); @@ -664,6 +975,7 @@ mod test { let dns_empty = dns_config_empty(); match dns_compute_update( &logctx.log, + DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), &dns_empty, @@ -717,6 +1029,7 @@ mod test { let update = dns_compute_update( &logctx.log, + DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), &dns_config1, @@ -742,6 +1055,544 @@ mod test { ] ); + // Test the difference between two configs whose SRV records differ. + let mut dns_config1 = dns_config1.clone(); + dns_config1.zones[0].records.insert( + String::from("_nexus._tcp"), + vec![ + DnsRecord::Srv(Srv { + port: 123, + prio: 1, + target: String::from("ex1.my-zone"), + weight: 2, + }), + DnsRecord::Srv(Srv { + port: 123, + prio: 1, + target: String::from("ex2.my-zone"), + weight: 2, + }), + ], + ); + // A clone of the same one should of course be the same as the original. + let mut dns_config2 = dns_config1.clone(); + let update = dns_compute_update( + &logctx.log, + DnsGroup::Internal, + "test-suite".to_string(), + "test-suite".to_string(), + &dns_config1, + &dns_config2, + ) + .expect("failed to compute update"); + assert!(update.is_none()); + + // If we shift the order of the items, it should still reflect no + // changes. + let records = + dns_config2.zones[0].records.get_mut("_nexus._tcp").unwrap(); + records.rotate_left(1); + assert!( + records != dns_config1.zones[0].records.get("_nexus._tcp").unwrap() + ); + let update = dns_compute_update( + &logctx.log, + DnsGroup::Internal, + "test-suite".to_string(), + "test-suite".to_string(), + &dns_config1, + &dns_config2, + ) + .expect("failed to compute update"); + assert!(update.is_none()); + + // If we add another record, there should indeed be a new update. + let records = + dns_config2.zones[0].records.get_mut("_nexus._tcp").unwrap(); + records.push(DnsRecord::Srv(Srv { + port: 123, + prio: 1, + target: String::from("ex3.my-zone"), + weight: 2, + })); + let final_records = records.clone(); + + let update = dns_compute_update( + &logctx.log, + DnsGroup::Internal, + "test-suite".to_string(), + "test-suite".to_string(), + &dns_config1, + &dns_config2, + ) + .expect("failed to compute update") + .expect("expected an update"); + + assert_eq!( + update.names_removed().collect::>(), + &["_nexus._tcp"] + ); + assert_eq!( + update.names_added().collect::>(), + &[("_nexus._tcp", final_records.as_slice())] + ); + logctx.cleanup_successful(); } + + // Tests end-to-end DNS behavior: + // + // - If we create a blueprint matching the current system, and then apply + // it, there are no changes to either internal or external DNS + // + // - If we create a Silo, DNS will be updated. If we then re-execute the + // previous blueprint, again, there will be no new changes to DNS. + // + // - If we then generate a blueprint with a Nexus zone and execute the DNS + // part of that, then: + // + // - internal DNS SRV record for _nexus._tcp is added + // - internal DNS AAAA record for the new zone is added + // - external DNS gets a A record for the new zone's external IP + // + // - If we subsequently create a new Silo, the new Silo's DNS record + // reflects the Nexus zone that was added. + #[nexus_test] + async fn test_silos_external_dns_end_to_end( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let log = &cptestctx.logctx.log; + let opctx = OpContext::for_background( + log.clone(), + Arc::new(authz::Authz::new(log)), + authn::Context::internal_api(), + datastore.clone(), + ); + + // First, wait until Nexus has successfully completed an inventory + // collection. + let collection = wait_for_condition( + || async { + let result = + datastore.inventory_get_latest_collection(&opctx).await; + let log_result = match &result { + Ok(Some(_)) => Ok("found"), + Ok(None) => Ok("not found"), + Err(error) => Err(error), + }; + debug!( + log, + "attempt to fetch latest inventory collection"; + "result" => ?log_result, + ); + + match result { + Ok(None) => Err(CondCheckError::NotYet), + Ok(Some(c)) => Ok(c), + Err(Error::ServiceUnavailable { .. }) => { + Err(CondCheckError::NotYet) + } + Err(error) => Err(CondCheckError::Failed(error)), + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .expect("expected to find inventory collection"); + + // Fetch the initial contents of internal and external DNS. + let dns_initial_internal = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("fetching initial internal DNS"); + let dns_initial_external = datastore + .dns_config_read(&opctx, DnsGroup::External) + .await + .expect("fetching initial external DNS"); + + // Now, use the collection to construct an initial blueprint. + // This stores it into the database, too. + info!(log, "using collection"; "collection_id" => %collection.id); + let blueprint = nexus + .blueprint_generate_from_collection(&opctx, collection.id) + .await + .expect("failed to generate initial blueprint"); + eprintln!("blueprint: {:?}", blueprint); + + // Set it as the current target. We'll need this later. + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("failed to set blueprint as target"); + + // Now, execute the blueprint. + let overrides = Overridables::for_test(cptestctx); + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + &blueprint, + "test-suite", + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); + + // DNS ought not to have changed. + verify_dns_unchanged( + &opctx, + datastore, + &dns_initial_internal, + &dns_initial_external, + ) + .await; + + // Create a Silo. Make sure that external DNS is updated (and that + // internal DNS is not). Then make sure that if we execute the same + // blueprint again, DNS does not change again (i.e., that it does not + // revert somehow). + let dns_latest_external = create_silo_and_verify_dns( + cptestctx, + &opctx, + datastore, + &blueprint, + &overrides, + "squidport", + &dns_initial_internal, + &dns_initial_external, + ) + .await; + + // Now, go through the motions of provisioning a new Nexus zone. + // We do this directly with BlueprintBuilder to avoid the planner + // deciding to make other unrelated changes. + let sled_rows = datastore.sled_list_all_batched(&opctx).await.unwrap(); + let zpool_rows = + datastore.zpool_list_all_external_batched(&opctx).await.unwrap(); + let ip_pool_range_rows = { + let (authz_service_ip_pool, _) = + datastore.ip_pools_service_lookup(&opctx).await.unwrap(); + datastore + .ip_pool_list_ranges_batched(&opctx, &authz_service_ip_pool) + .await + .unwrap() + }; + let mut policy = policy_from_db( + &sled_rows, + &zpool_rows, + &ip_pool_range_rows, + // This is not used because we're not actually going through the + // planner. + NEXUS_REDUNDANCY, + ) + .unwrap(); + // We'll need another (fake) external IP for this new Nexus. + policy + .service_ip_pool_ranges + .push(IpRange::from(IpAddr::V4(Ipv4Addr::LOCALHOST))); + let mut builder = BlueprintBuilder::new_based_on( + &log, + &blueprint, + Generation::from( + u32::try_from(dns_initial_internal.generation).unwrap(), + ), + Generation::from( + u32::try_from(dns_latest_external.generation).unwrap(), + ), + &policy, + "test suite", + ) + .unwrap(); + let sled_id = + blueprint.sleds().next().expect("expected at least one sled"); + let nalready = builder.sled_num_nexus_zones(sled_id); + let rv = builder + .sled_ensure_zone_multiple_nexus(sled_id, nalready + 1) + .unwrap(); + assert_eq!(rv, EnsureMultiple::Added(1)); + let blueprint2 = builder.build(); + eprintln!("blueprint2: {:?}", blueprint2); + // Figure out the id of the new zone. + let zones_before = blueprint + .all_omicron_zones() + .filter_map(|(_, z)| z.zone_type.is_nexus().then_some(z.id)) + .collect::>(); + let zones_after = blueprint2 + .all_omicron_zones() + .filter_map(|(_, z)| z.zone_type.is_nexus().then_some(z.id)) + .collect::>(); + let new_zones: Vec<_> = zones_after.difference(&zones_before).collect(); + assert_eq!(new_zones.len(), 1); + let new_zone_id = *new_zones[0]; + + // Set this blueprint as the current target. We set it to disabled + // because we're controlling the execution directly here. But we need + // to do this so that silo creation sees the change. + datastore + .blueprint_insert(&opctx, &blueprint2) + .await + .expect("failed to save blueprint to database"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint2.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("failed to set blueprint as target"); + + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + &blueprint2, + "test-suite", + &overrides, + ) + .await + .expect("failed to execute second blueprint"); + + // Now fetch DNS again. Both should have changed this time. + let dns_latest_internal = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("fetching latest internal DNS"); + + assert_eq!( + dns_latest_internal.generation, + dns_initial_internal.generation + 1, + ); + + let diff = + DnsDiff::new(&dns_initial_internal, &dns_latest_internal).unwrap(); + // There should be one new AAAA record for the zone itself. + let new_records: Vec<_> = diff.names_added().collect(); + let (new_name, &[DnsRecord::Aaaa(_)]) = new_records[0] else { + panic!("did not find expected AAAA record for new Nexus zone"); + }; + let new_zone_host = internal_dns::config::Host::for_zone( + new_zone_id, + internal_dns::config::ZoneVariant::Other, + ); + assert!(new_zone_host.fqdn().starts_with(new_name)); + + // Nothing was removed. + assert!(diff.names_removed().next().is_none()); + + // The SRV record for Nexus itself ought to have changed, growing one + // more record -- for the new AAAA record above. + let changed: Vec<_> = diff.names_changed().collect(); + assert_eq!(changed.len(), 1); + let (name, old_records, new_records) = changed[0]; + assert_eq!(name, ServiceName::Nexus.dns_name()); + let new_srv = subset_plus_one(old_records, new_records); + let DnsRecord::Srv(new_srv) = new_srv else { + panic!("expected SRV record, found {:?}", new_srv); + }; + assert_eq!(new_srv.target, new_zone_host.fqdn()); + + // As for external DNS: all existing names ought to have been changed, + // gaining a new A record for the new host. + let dns_previous_external = dns_latest_external; + let dns_latest_external = datastore + .dns_config_read(&opctx, DnsGroup::External) + .await + .expect("fetching latest external DNS"); + assert_eq!( + dns_latest_external.generation, + dns_previous_external.generation + 1, + ); + let diff = + DnsDiff::new(&dns_previous_external, &dns_latest_external).unwrap(); + assert!(diff.names_added().next().is_none()); + assert!(diff.names_removed().next().is_none()); + let changed: Vec<_> = diff.names_changed().collect(); + for (name, old_records, new_records) in changed { + // These are Silo names and end with ".sys". + assert!(name.ends_with(".sys")); + // We can't really tell which one points to what, especially in the + // test suite where all Nexus zones use localhost for their external + // IP. All we can tell is that there's one new one. + assert_eq!(old_records.len() + 1, new_records.len()); + } + + // If we execute it again, we should see no more changes. + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + &blueprint2, + "test-suite", + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); + verify_dns_unchanged( + &opctx, + datastore, + &dns_latest_internal, + &dns_latest_external, + ) + .await; + + // Now create another Silo and verify the changes to DNS. + // This ensures that the "create Silo" path picks up Nexus instances + // that exist only in Reconfigurator, not the services table. + let dns_latest_external = create_silo_and_verify_dns( + &cptestctx, + &opctx, + datastore, + &blueprint2, + &overrides, + "tickety-boo", + &dns_latest_internal, + &dns_latest_external, + ) + .await; + + // One more time, make sure that executing the blueprint does not do + // anything. + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + &blueprint2, + "test-suite", + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); + verify_dns_unchanged( + &opctx, + datastore, + &dns_latest_internal, + &dns_latest_external, + ) + .await; + } + + fn subset_plus_one<'a, T: std::fmt::Debug + Ord + Eq>( + list1: &'a [T], + list2: &'a [T], + ) -> &'a T { + let set: BTreeSet<_> = list1.into_iter().collect(); + let mut extra = Vec::with_capacity(1); + for item in list2 { + if !set.contains(&item) { + extra.push(item); + } + } + + if extra.len() != 1 { + panic!( + "expected list2 to have one extra element:\n\ + list1: {:?}\n\ + list2: {:?}\n + extra: {:?}\n", + list1, list2, extra + ); + } + + extra.into_iter().next().unwrap() + } + + #[allow(clippy::too_many_arguments)] + async fn create_silo_and_verify_dns( + cptestctx: &ControlPlaneTestContext, + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + overrides: &Overridables, + silo_name: &str, + old_internal: &DnsConfigParams, + old_external: &DnsConfigParams, + ) -> DnsConfigParams { + // Create a Silo. Make sure that external DNS is updated (and that + // internal DNS is not). This is tested elsewhere already but really we + // want to make sure that if we then execute the blueprint again, DNS + // does not change _again_ (i.e., does not somehow revert). + let silo = create_silo( + &cptestctx.external_client, + silo_name, + false, + shared::SiloIdentityMode::SamlJit, + ) + .await; + + let dns_latest_internal = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("fetching latest internal DNS"); + assert_eq!(old_internal.generation, dns_latest_internal.generation); + let dns_latest_external = datastore + .dns_config_read(&opctx, DnsGroup::External) + .await + .expect("fetching latest external DNS"); + assert_eq!(old_external.generation + 1, dns_latest_external.generation); + + // Specifically, there should be one new name (for the new Silo). + let diff = DnsDiff::new(&old_external, &dns_latest_external).unwrap(); + assert!(diff.names_removed().next().is_none()); + assert!(diff.names_changed().next().is_none()); + let added = diff.names_added().collect::>(); + assert_eq!(added.len(), 1); + let (new_name, new_records) = added[0]; + assert_eq!(new_name, silo_dns_name(&silo.identity.name)); + // And it should have the same IP addresses as all of the other Silos. + assert_eq!( + new_records, + old_external.zones[0].records.values().next().unwrap() + ); + + // If we execute the blueprint, DNS should not be changed. + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + &blueprint, + "test-suite", + &overrides, + ) + .await + .expect("failed to execute blueprint"); + let dns_latest_internal = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("fetching latest internal DNS"); + let dns_latest_external = datastore + .dns_config_read(&opctx, DnsGroup::External) + .await + .expect("fetching latest external DNS"); + assert_eq!(old_internal.generation, dns_latest_internal.generation); + assert_eq!(old_external.generation + 1, dns_latest_external.generation); + + dns_latest_external + } + + async fn verify_dns_unchanged( + opctx: &OpContext, + datastore: &DataStore, + old_internal: &DnsConfigParams, + old_external: &DnsConfigParams, + ) { + let dns_latest_internal = datastore + .dns_config_read(&opctx, DnsGroup::Internal) + .await + .expect("fetching latest internal DNS"); + let dns_latest_external = datastore + .dns_config_read(&opctx, DnsGroup::External) + .await + .expect("fetching latest external DNS"); + + assert_eq!(old_internal.generation, dns_latest_internal.generation); + assert_eq!(old_external.generation, dns_latest_external.generation); + } } diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 386e9dc1c8..8da4e7d44c 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -13,15 +13,19 @@ use nexus_types::deployment::Blueprint; use nexus_types::identity::Asset; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; +use overridables::Overridables; use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; use uuid::Uuid; +pub use dns::silo_dns_name; + mod datasets; mod dns; mod omicron_zones; +mod overridables; mod resource_allocation; struct Sled { @@ -57,6 +61,26 @@ pub async fn realize_blueprint( blueprint: &Blueprint, nexus_label: S, ) -> Result<(), Vec> +where + String: From, +{ + realize_blueprint_with_overrides( + opctx, + datastore, + blueprint, + nexus_label, + &Default::default(), + ) + .await +} + +pub async fn realize_blueprint_with_overrides( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + nexus_label: S, + overrides: &Overridables, +) -> Result<(), Vec> where String: From, { @@ -123,6 +147,7 @@ where String::from(nexus_label), blueprint, &sleds_by_id, + &overrides, ) .await .map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))])?; diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index 336e40df16..6054a40f8a 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -108,6 +108,7 @@ mod test { zones_in_service: BTreeSet::new(), parent_blueprint_id: None, internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/reconfigurator/execution/src/overridables.rs b/nexus/reconfigurator/execution/src/overridables.rs new file mode 100644 index 0000000000..5c4ce7dc6f --- /dev/null +++ b/nexus/reconfigurator/execution/src/overridables.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::address::get_switch_zone_address; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::MGD_PORT; +use omicron_common::address::MGS_PORT; +use omicron_common::address::SLED_PREFIX; +use std::collections::BTreeMap; +use std::net::Ipv6Addr; +use uuid::Uuid; + +/// Override values used during blueprint execution +/// +/// Blueprint execution assumes certain values about production systems that +/// differ in the simulated testing environment and cannot be easily derived +/// from anything else in the environment. To accommodate this, this structure +/// provides access to these values. Everywhere except the test suite, this +/// structure is empty and returns the default (production) values. The test +/// suite overrides these values. +#[derive(Debug, Default)] +pub struct Overridables { + /// map: sled id -> TCP port on which that sled's Dendrite is listening + pub dendrite_ports: BTreeMap, + /// map: sled id -> TCP port on which that sled's MGS is listening + pub mgs_ports: BTreeMap, + /// map: sled id -> TCP port on which that sled's MGD is listening + pub mgd_ports: BTreeMap, + /// map: sled id -> IP address of the sled's switch zone + pub switch_zone_ips: BTreeMap, +} + +impl Overridables { + /// Specify the TCP port on which this sled's Dendrite is listening + #[cfg(test)] + fn override_dendrite_port(&mut self, sled_id: Uuid, port: u16) { + self.dendrite_ports.insert(sled_id, port); + } + + /// Returns the TCP port on which this sled's Dendrite is listening + pub fn dendrite_port(&self, sled_id: Uuid) -> u16 { + self.dendrite_ports.get(&sled_id).copied().unwrap_or(DENDRITE_PORT) + } + + /// Specify the TCP port on which this sled's MGS is listening + #[cfg(test)] + fn override_mgs_port(&mut self, sled_id: Uuid, port: u16) { + self.mgs_ports.insert(sled_id, port); + } + + /// Returns the TCP port on which this sled's MGS is listening + pub fn mgs_port(&self, sled_id: Uuid) -> u16 { + self.mgs_ports.get(&sled_id).copied().unwrap_or(MGS_PORT) + } + + /// Specify the TCP port on which this sled's MGD is listening + #[cfg(test)] + fn override_mgd_port(&mut self, sled_id: Uuid, port: u16) { + self.mgd_ports.insert(sled_id, port); + } + + /// Returns the TCP port on which this sled's MGD is listening + pub fn mgd_port(&self, sled_id: Uuid) -> u16 { + self.mgd_ports.get(&sled_id).copied().unwrap_or(MGD_PORT) + } + + /// Specify the IP address of this switch zone + #[cfg(test)] + fn override_switch_zone_ip(&mut self, sled_id: Uuid, addr: Ipv6Addr) { + self.switch_zone_ips.insert(sled_id, addr); + } + + /// Returns the IP address of this sled's switch zone + pub fn switch_zone_ip( + &self, + sled_id: Uuid, + sled_subnet: Ipv6Subnet, + ) -> Ipv6Addr { + self.switch_zone_ips + .get(&sled_id) + .copied() + .unwrap_or_else(|| get_switch_zone_address(sled_subnet)) + } + + /// Generates a set of overrides describing the simulated test environment. + #[cfg(test)] + pub fn for_test( + cptestctx: &nexus_test_utils::ControlPlaneTestContext< + omicron_nexus::Server, + >, + ) -> Overridables { + use omicron_common::api::external::SwitchLocation; + + let mut overrides = Overridables::default(); + let scrimlets = [ + (nexus_test_utils::SLED_AGENT_UUID, SwitchLocation::Switch0), + (nexus_test_utils::SLED_AGENT2_UUID, SwitchLocation::Switch1), + ]; + for (id_str, switch_location) in scrimlets { + let sled_id = id_str.parse().unwrap(); + let ip = Ipv6Addr::LOCALHOST; + let mgs_port = cptestctx + .gateway + .get(&switch_location) + .unwrap() + .client + .bind_address + .port(); + let dendrite_port = + cptestctx.dendrite.get(&switch_location).unwrap().port; + let mgd_port = cptestctx.mgd.get(&switch_location).unwrap().port; + overrides.override_switch_zone_ip(sled_id, ip); + overrides.override_dendrite_port(sled_id, dendrite_port); + overrides.override_mgs_port(sled_id, mgs_port); + overrides.override_mgd_port(sled_id, mgd_port); + } + overrides + } +} diff --git a/nexus/reconfigurator/execution/src/resource_allocation.rs b/nexus/reconfigurator/execution/src/resource_allocation.rs index 83a484baa4..5872cee0f9 100644 --- a/nexus/reconfigurator/execution/src/resource_allocation.rs +++ b/nexus/reconfigurator/execution/src/resource_allocation.rs @@ -93,6 +93,14 @@ impl<'a> ResourceAllocator<'a> { external_ip: IpAddr, port_range: Option<(u16, u16)>, ) -> anyhow::Result { + // localhost is used by many components in the test suite. We can't use + // the normal path because normally a given external IP must only be + // used once. Just treat localhost in the test suite as though it's + // already allocated. We do the same in is_nic_already_allocated(). + if cfg!(test) && external_ip.is_loopback() { + return Ok(true); + } + let allocated_ips = self .datastore .service_lookup_external_ips(self.opctx, zone_id) @@ -157,6 +165,11 @@ impl<'a> ResourceAllocator<'a> { zone_id: Uuid, nic: &NetworkInterface, ) -> anyhow::Result { + // See the comment in is_external_ip_already_allocated(). + if cfg!(test) && nic.ip.is_loopback() { + return Ok(true); + } + let allocated_nics = self .datastore .service_list_network_interfaces(self.opctx, zone_id) diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 52111bab94..3c4ba12ee4 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -27,4 +27,3 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true omicron-test-utils.workspace = true -sled-agent-client.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 7a99343bf9..8b0d440d26 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -110,6 +110,7 @@ pub struct BlueprintBuilder<'a> { /// previous blueprint, on which this one will be based parent_blueprint: &'a Blueprint, internal_dns_version: Generation, + external_dns_version: Generation, // These fields are used to allocate resources from sleds. policy: &'a Policy, @@ -142,12 +143,14 @@ impl<'a> BlueprintBuilder<'a> { pub fn build_initial_from_collection( collection: &'a Collection, internal_dns_version: Generation, + external_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> Result { Self::build_initial_impl( collection, internal_dns_version, + external_dns_version, policy, creator, BlueprintBuilderRng::new(), @@ -159,6 +162,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn build_initial_from_collection_seeded( collection: &'a Collection, internal_dns_version: Generation, + external_dns_version: Generation, policy: &'a Policy, creator: &str, seed: H, @@ -168,6 +172,7 @@ impl<'a> BlueprintBuilder<'a> { Self::build_initial_impl( collection, internal_dns_version, + external_dns_version, policy, creator, rng, @@ -177,6 +182,7 @@ impl<'a> BlueprintBuilder<'a> { fn build_initial_impl( collection: &'a Collection, internal_dns_version: Generation, + external_dns_version: Generation, policy: &'a Policy, creator: &str, mut rng: BlueprintBuilderRng, @@ -225,6 +231,7 @@ impl<'a> BlueprintBuilder<'a> { zones_in_service, parent_blueprint_id: None, internal_dns_version, + external_dns_version, time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), @@ -237,6 +244,7 @@ impl<'a> BlueprintBuilder<'a> { log: &Logger, parent_blueprint: &'a Blueprint, internal_dns_version: Generation, + external_dns_version: Generation, policy: &'a Policy, creator: &str, ) -> anyhow::Result> { @@ -298,7 +306,12 @@ impl<'a> BlueprintBuilder<'a> { } } if let Some(external_ip) = z.zone_type.external_ip()? { - if !used_external_ips.insert(external_ip) { + // For the test suite, ignore localhost. It gets reused many + // times and that's okay. We don't expect to see localhost + // outside the test suite. + if !external_ip.is_loopback() + && !used_external_ips.insert(external_ip) + { bail!("duplicate external IP: {external_ip}"); } } @@ -344,6 +357,7 @@ impl<'a> BlueprintBuilder<'a> { log, parent_blueprint, internal_dns_version, + external_dns_version, policy, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZones::new(parent_blueprint), @@ -369,6 +383,7 @@ impl<'a> BlueprintBuilder<'a> { zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), internal_dns_version: self.internal_dns_version, + external_dns_version: self.external_dns_version, time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), @@ -754,8 +769,7 @@ impl UuidRng { } /// `extra` is a string that should be unique to the purpose of the UUIDs. - #[cfg(test)] - fn from_seed(seed: H, extra: &'static str) -> Self { + pub(crate) fn from_seed(seed: H, extra: &'static str) -> Self { let mut seeder = rand_seeder::Seeder::from((seed, extra)); Self { rng: seeder.make_rng::() } } @@ -872,145 +886,15 @@ impl<'a> BlueprintZones<'a> { #[cfg(test)] pub mod test { use super::*; + use crate::example::example; + use crate::example::ExampleSystem; use crate::system::SledBuilder; - use crate::system::SystemDescription; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; - use sled_agent_client::types::{ - OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, - }; + use sled_agent_client::types::{OmicronZoneConfig, OmicronZoneType}; pub const DEFAULT_N_SLEDS: usize = 3; - pub struct ExampleSystem { - pub system: SystemDescription, - pub policy: Policy, - pub collection: Collection, - pub blueprint: Blueprint, - // If we add more types of RNGs than just sleds here, we'll need to - // expand this to be similar to BlueprintBuilderRng where a root RNG - // creates sub-RNGs. - pub(crate) sled_rng: UuidRng, - } - - impl ExampleSystem { - pub fn new( - log: &slog::Logger, - test_name: &str, - nsleds: usize, - ) -> ExampleSystem { - let mut system = SystemDescription::new(); - let mut sled_rng = UuidRng::from_seed(test_name, "ExampleSystem"); - - let sled_ids: Vec<_> = - (0..nsleds).map(|_| sled_rng.next_uuid()).collect(); - for sled_id in &sled_ids { - let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap(); - } - - let policy = system.to_policy().expect("failed to make policy"); - let mut inventory_builder = system - .to_collection_builder() - .expect("failed to build collection"); - - // For each sled, have it report 0 zones in the initial inventory. - // This will enable us to build a blueprint from the initial - // inventory, which we can then use to build new blueprints. - for sled_id in &sled_ids { - inventory_builder - .found_sled_omicron_zones( - "fake sled agent", - *sled_id, - OmicronZonesConfig { - generation: Generation::new(), - zones: vec![], - }, - ) - .expect("recording Omicron zones"); - } - - let empty_zone_inventory = inventory_builder.build(); - let initial_blueprint = - BlueprintBuilder::build_initial_from_collection_seeded( - &empty_zone_inventory, - Generation::new(), - &policy, - "test suite", - (test_name, "ExampleSystem initial"), - ) - .unwrap(); - - // Now make a blueprint and collection with some zones on each sled. - let mut builder = BlueprintBuilder::new_based_on( - &log, - &initial_blueprint, - Generation::new(), - &policy, - "test suite", - ) - .unwrap(); - builder.set_rng_seed((test_name, "ExampleSystem make_zones")); - for (sled_id, sled_resources) in &policy.sleds { - let _ = builder.sled_ensure_zone_ntp(*sled_id).unwrap(); - let _ = builder - .sled_ensure_zone_multiple_nexus_with_config( - *sled_id, - 1, - false, - vec![], - ) - .unwrap(); - for pool_name in &sled_resources.zpools { - let _ = builder - .sled_ensure_zone_crucible(*sled_id, pool_name.clone()) - .unwrap(); - } - } - - let blueprint = builder.build(); - let mut builder = system - .to_collection_builder() - .expect("failed to build collection"); - - for sled_id in blueprint.sleds() { - let Some(zones) = blueprint.omicron_zones.get(&sled_id) else { - continue; - }; - builder - .found_sled_omicron_zones( - "fake sled agent", - sled_id, - zones.clone(), - ) - .unwrap(); - } - - ExampleSystem { - system, - policy, - collection: builder.build(), - blueprint, - sled_rng, - } - } - } - - /// Returns a collection and policy describing a pretty simple system. - /// - /// The test name is used as the RNG seed. - /// - /// `n_sleds` is the number of sleds supported. Currently, this value can - /// be anywhere between 0 and 5. (More can be added in the future if - /// necessary.) - pub fn example( - log: &slog::Logger, - test_name: &str, - nsleds: usize, - ) -> (Collection, Policy) { - let example = ExampleSystem::new(log, test_name, nsleds); - (example.collection, example.policy) - } - /// Checks various conditions that should be true for all blueprints pub fn verify_blueprint(blueprint: &Blueprint) { let mut underlay_ips: BTreeMap = @@ -1042,6 +926,7 @@ pub mod test { BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "the_test", TEST_NAME, @@ -1067,6 +952,7 @@ pub mod test { &logctx.log, &blueprint_initial, Generation::new(), + Generation::new(), &policy, "test_basic", ) @@ -1098,6 +984,7 @@ pub mod test { &logctx.log, blueprint1, Generation::new(), + Generation::new(), &example.policy, "test_basic", ) @@ -1135,6 +1022,7 @@ pub mod test { &logctx.log, &blueprint2, Generation::new(), + Generation::new(), &policy, "test_basic", ) @@ -1215,8 +1103,9 @@ pub mod test { let (mut collection, policy) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - // We don't care about the internal DNS version here. + // We don't care about the DNS versions here. let internal_dns_version = Generation::new(); + let external_dns_version = Generation::new(); // Adding a new Nexus zone currently requires copying settings from an // existing Nexus zone. If we remove all Nexus zones from the @@ -1231,6 +1120,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, internal_dns_version, + external_dns_version, &policy, "test", TEST_NAME, @@ -1241,6 +1131,7 @@ pub mod test { &logctx.log, &parent, internal_dns_version, + external_dns_version, &policy, "test", ) @@ -1273,8 +1164,9 @@ pub mod test { let (mut collection, policy) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - // We don't care about the internal DNS version here. + // We don't care about the DNS versions here. let internal_dns_version = Generation::new(); + let external_dns_version = Generation::new(); // Remove the Nexus zone from one of the sleds so that // `sled_ensure_zone_nexus` can attempt to add a Nexus zone to @@ -1297,6 +1189,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "test", TEST_NAME, @@ -1310,6 +1203,7 @@ pub mod test { &logctx.log, &parent, internal_dns_version, + external_dns_version, &policy, "test", ) @@ -1329,6 +1223,7 @@ pub mod test { &logctx.log, &parent, internal_dns_version, + external_dns_version, &policy, "test", ) @@ -1362,6 +1257,7 @@ pub mod test { &logctx.log, &parent, internal_dns_version, + external_dns_version, &policy, "test", ) @@ -1424,6 +1320,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "test", TEST_NAME, @@ -1434,6 +1331,7 @@ pub mod test { &logctx.log, &parent, Generation::new(), + Generation::new(), &policy, "test", ) { @@ -1482,6 +1380,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "test", TEST_NAME, @@ -1492,6 +1391,7 @@ pub mod test { &logctx.log, &parent, Generation::new(), + Generation::new(), &policy, "test", ) { @@ -1540,6 +1440,7 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "test", TEST_NAME, @@ -1550,6 +1451,7 @@ pub mod test { &logctx.log, &parent, Generation::new(), + Generation::new(), &policy, "test", ) { diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs new file mode 100644 index 0000000000..c8c1bb380b --- /dev/null +++ b/nexus/reconfigurator/planning/src/example.rs @@ -0,0 +1,147 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Example blueprints + +use crate::blueprint_builder::BlueprintBuilder; +use crate::blueprint_builder::UuidRng; +use crate::system::SledBuilder; +use crate::system::SystemDescription; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::Policy; +use nexus_types::inventory::Collection; +use omicron_common::api::external::Generation; +use sled_agent_client::types::OmicronZonesConfig; + +pub struct ExampleSystem { + pub system: SystemDescription, + pub policy: Policy, + pub collection: Collection, + pub blueprint: Blueprint, + // If we add more types of RNGs than just sleds here, we'll need to + // expand this to be similar to BlueprintBuilderRng where a root RNG + // creates sub-RNGs. + // + // This is currently only used for tests, so it looks unused in normal + // builds. But in the future it could be used by other consumers, too. + #[allow(dead_code)] + pub(crate) sled_rng: UuidRng, +} + +impl ExampleSystem { + pub fn new( + log: &slog::Logger, + test_name: &str, + nsleds: usize, + ) -> ExampleSystem { + let mut system = SystemDescription::new(); + let mut sled_rng = UuidRng::from_seed(test_name, "ExampleSystem"); + let sled_ids: Vec<_> = + (0..nsleds).map(|_| sled_rng.next_uuid()).collect(); + for sled_id in &sled_ids { + let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap(); + } + + let policy = system.to_policy().expect("failed to make policy"); + let mut inventory_builder = + system.to_collection_builder().expect("failed to build collection"); + + // For each sled, have it report 0 zones in the initial inventory. + // This will enable us to build a blueprint from the initial + // inventory, which we can then use to build new blueprints. + for sled_id in &sled_ids { + inventory_builder + .found_sled_omicron_zones( + "fake sled agent", + *sled_id, + OmicronZonesConfig { + generation: Generation::new(), + zones: vec![], + }, + ) + .expect("recording Omicron zones"); + } + + let empty_zone_inventory = inventory_builder.build(); + let initial_blueprint = + BlueprintBuilder::build_initial_from_collection_seeded( + &empty_zone_inventory, + Generation::new(), + Generation::new(), + &policy, + "test suite", + (test_name, "ExampleSystem initial"), + ) + .unwrap(); + + // Now make a blueprint and collection with some zones on each sled. + let mut builder = BlueprintBuilder::new_based_on( + &log, + &initial_blueprint, + Generation::new(), + Generation::new(), + &policy, + "test suite", + ) + .unwrap(); + builder.set_rng_seed((test_name, "ExampleSystem make_zones")); + for (sled_id, sled_resources) in &policy.sleds { + let _ = builder.sled_ensure_zone_ntp(*sled_id).unwrap(); + let _ = builder + .sled_ensure_zone_multiple_nexus_with_config( + *sled_id, + 1, + false, + vec![], + ) + .unwrap(); + for pool_name in &sled_resources.zpools { + let _ = builder + .sled_ensure_zone_crucible(*sled_id, pool_name.clone()) + .unwrap(); + } + } + + let blueprint = builder.build(); + let mut builder = + system.to_collection_builder().expect("failed to build collection"); + + for sled_id in blueprint.sleds() { + let Some(zones) = blueprint.omicron_zones.get(&sled_id) else { + continue; + }; + builder + .found_sled_omicron_zones( + "fake sled agent", + sled_id, + zones.clone(), + ) + .unwrap(); + } + + ExampleSystem { + system, + policy, + collection: builder.build(), + blueprint, + sled_rng, + } + } +} + +/// Returns a collection and policy describing a pretty simple system. +/// +/// The test name is used as the RNG seed. +/// +/// `n_sleds` is the number of sleds supported. Currently, this value can +/// be anywhere between 0 and 5. (More can be added in the future if +/// necessary.) +pub fn example( + log: &slog::Logger, + test_name: &str, + nsleds: usize, +) -> (Collection, Policy) { + let example = ExampleSystem::new(log, test_name, nsleds); + (example.collection, example.policy) +} diff --git a/nexus/reconfigurator/planning/src/lib.rs b/nexus/reconfigurator/planning/src/lib.rs index e0a61826f0..3d6cc6a778 100644 --- a/nexus/reconfigurator/planning/src/lib.rs +++ b/nexus/reconfigurator/planning/src/lib.rs @@ -116,6 +116,7 @@ //! updates, etc. pub mod blueprint_builder; +pub mod example; mod ip_allocator; pub mod planner; pub mod system; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 1882d62e22..1f79fda222 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -42,6 +42,7 @@ impl<'a> Planner<'a> { log: Logger, parent_blueprint: &'a Blueprint, internal_dns_version: Generation, + external_dns_version: Generation, policy: &'a Policy, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory @@ -52,6 +53,7 @@ impl<'a> Planner<'a> { &log, parent_blueprint, internal_dns_version, + external_dns_version, policy, creator, )?; @@ -330,11 +332,11 @@ impl<'a> Planner<'a> { #[cfg(test)] mod test { use super::Planner; - use crate::blueprint_builder::test::example; use crate::blueprint_builder::test::verify_blueprint; - use crate::blueprint_builder::test::ExampleSystem; use crate::blueprint_builder::test::DEFAULT_N_SLEDS; use crate::blueprint_builder::BlueprintBuilder; + use crate::example::example; + use crate::example::ExampleSystem; use crate::system::SledBuilder; use expectorate::assert_contents; use nexus_inventory::now_db_precision; @@ -352,8 +354,9 @@ mod test { static TEST_NAME: &str = "planner_basic_add_sled"; let logctx = test_setup_log(TEST_NAME); - // For our purposes, we don't care about the internal DNS generation. + // For our purposes, we don't care about the DNS generations. let internal_dns_version = Generation::new(); + let external_dns_version = Generation::new(); // Use our example inventory collection. let mut example = @@ -365,6 +368,7 @@ mod test { BlueprintBuilder::build_initial_from_collection_seeded( &example.collection, internal_dns_version, + external_dns_version, &example.policy, "the_test", (TEST_NAME, "bp1"), @@ -379,6 +383,7 @@ mod test { logctx.log.clone(), &blueprint1, internal_dns_version, + external_dns_version, &example.policy, "no-op?", &example.collection, @@ -406,6 +411,7 @@ mod test { logctx.log.clone(), &blueprint2, internal_dns_version, + external_dns_version, &policy, "test: add NTP?", &example.collection, @@ -447,6 +453,7 @@ mod test { logctx.log.clone(), &blueprint3, internal_dns_version, + external_dns_version, &policy, "test: add nothing more", &example.collection, @@ -486,6 +493,7 @@ mod test { logctx.log.clone(), &blueprint3, internal_dns_version, + external_dns_version, &policy, "test: add Crucible zones?", &collection, @@ -527,6 +535,7 @@ mod test { logctx.log.clone(), &blueprint5, internal_dns_version, + external_dns_version, &policy, "test: no-op?", &collection, @@ -553,8 +562,9 @@ mod test { static TEST_NAME: &str = "planner_add_multiple_nexus_to_one_sled"; let logctx = test_setup_log(TEST_NAME); - // For our purposes, we don't care about the internal DNS generation. + // For our purposes, we don't care about the DNS generations. let internal_dns_version = Generation::new(); + let external_dns_version = Generation::new(); // Use our example inventory collection as a starting point, but strip // it down to just one sled. @@ -580,6 +590,7 @@ mod test { BlueprintBuilder::build_initial_from_collection_seeded( &collection, internal_dns_version, + external_dns_version, &policy, "the_test", (TEST_NAME, "bp1"), @@ -608,6 +619,7 @@ mod test { logctx.log.clone(), &blueprint1, internal_dns_version, + external_dns_version, &policy, "add more Nexus", &collection, @@ -655,6 +667,7 @@ mod test { BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "the_test", (TEST_NAME, "bp1"), @@ -680,6 +693,7 @@ mod test { logctx.log.clone(), &blueprint1, Generation::new(), + Generation::new(), &policy, "add more Nexus", &collection, @@ -744,6 +758,7 @@ mod test { BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), + Generation::new(), &policy, "the_test", (TEST_NAME, "bp1"), @@ -800,6 +815,7 @@ mod test { logctx.log.clone(), &blueprint1, Generation::new(), + Generation::new(), &policy, "add more Nexus", &collection, diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 7024f8be75..c65a49ec24 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -132,7 +132,7 @@ mod test { fn create_blueprint( omicron_zones: BTreeMap, - internal_dns_version: Generation, + dns_version: Generation, ) -> (BlueprintTarget, Blueprint) { let id = Uuid::new_v4(); ( @@ -146,7 +146,8 @@ mod test { omicron_zones, zones_in_service: BTreeSet::new(), parent_blueprint_id: None, - internal_dns_version, + internal_dns_version: dns_version, + external_dns_version: dns_version, time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs index 603bc6b7d2..b593354099 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/blueprint_load.rs @@ -232,6 +232,7 @@ mod test { zones_in_service: BTreeSet::new(), parent_blueprint_id, internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: now_db_precision(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 4f66f2af78..48ed844f12 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -35,6 +35,7 @@ struct PlanningContext { creator: String, inventory: Option, internal_dns_version: Generation, + external_dns_version: Generation, } impl super::Nexus { @@ -168,21 +169,28 @@ impl super::Nexus { "fetching latest inventory collection for blueprint planner", )?; - // Fetch the current internal DNS version. This could be made part of + // Fetch the current DNS versions. This could be made part of // inventory, but it's enough of a one-off that there's no particular // advantage to doing that work now. - let dns_version = datastore + let internal_dns_version = datastore .dns_group_latest_version(opctx, DnsGroup::Internal) .await .internal_context( "fetching internal DNS version for blueprint planning", )?; + let external_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::External) + .await + .internal_context( + "fetching external DNS version for blueprint planning", + )?; Ok(PlanningContext { creator, policy, inventory, - internal_dns_version: *dns_version.version, + internal_dns_version: *internal_dns_version.version, + external_dns_version: *external_dns_version.version, }) } @@ -207,6 +215,7 @@ impl super::Nexus { let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, planning_context.internal_dns_version, + planning_context.external_dns_version, &planning_context.policy, &planning_context.creator, ) @@ -242,6 +251,7 @@ impl super::Nexus { opctx.log.clone(), &parent_blueprint, planning_context.internal_dns_version, + planning_context.external_dns_version, &planning_context.policy, &planning_context.creator, &inventory, diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index bcfec667ce..25a9dd4e6c 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -26,7 +26,6 @@ //! "certificate resolver" object that impls //! [`rustls::server::ResolvesServerCert`]. See [`NexusCertResolver`]. -use super::silo::silo_dns_name; use crate::ServerContext; use anyhow::anyhow; use anyhow::bail; @@ -39,6 +38,7 @@ use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::fixed_data::silo::SILO_ID; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::DataStore; +use nexus_reconfigurator_execution::silo_dns_name; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::DataPageParams; diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 4030fce31d..4ba31bb0fe 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -4,7 +4,6 @@ //! Rack management -use super::silo::silo_dns_name; use crate::external_api::params; use crate::external_api::params::CertificateCreate; use crate::external_api::shared::ServiceUsingCertificate; @@ -20,6 +19,7 @@ use nexus_db_queries::db; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; use nexus_db_queries::db::datastore::RackInit; use nexus_db_queries::db::lookup::LookupPath; +use nexus_reconfigurator_execution::silo_dns_name; use nexus_types::external_api::params::Address; use nexus_types::external_api::params::AddressConfig; use nexus_types::external_api::params::AddressLotBlockCreate; diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index 8461be015a..487af96aab 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -16,6 +16,7 @@ use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::{self, lookup}; use nexus_db_queries::{authn, authz}; +use nexus_reconfigurator_execution::silo_dns_name; use nexus_types::internal_api::params::DnsRecord; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::ListResultVec; @@ -95,8 +96,13 @@ impl super::Nexus { // Set up an external DNS name for this Silo's API and console // endpoints (which are the same endpoint). + let target_blueprint = datastore + .blueprint_target_get_current_full(opctx) + .await + .internal_context("loading target blueprint")?; + let target = target_blueprint.as_ref().map(|(_, blueprint)| blueprint); let (nexus_external_ips, nexus_external_dns_zones) = - datastore.nexus_external_addresses(nexus_opctx).await?; + datastore.nexus_external_addresses(nexus_opctx, target).await?; let dns_records: Vec = nexus_external_ips .into_iter() .map(|addr| match addr { @@ -886,16 +892,3 @@ impl super::Nexus { LookupPath::new(opctx, &self.db_datastore).silo_group_id(*group_id) } } - -/// Returns the (relative) DNS name for this Silo's API and console endpoints -/// _within_ the external DNS zone (i.e., without that zone's suffix) -/// -/// This specific naming scheme is determined under RFD 357. -pub(crate) fn silo_dns_name( - name: &omicron_common::api::external::Name, -) -> String { - // RFD 4 constrains resource names (including Silo names) to DNS-safe - // strings, which is why it's safe to directly put the name of the - // resource into the DNS name rather than doing any kind of escaping. - format!("{}.sys", name) -} diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 7054a3ea15..a6b53272eb 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -151,9 +151,13 @@ pub struct Blueprint { pub parent_blueprint_id: Option, /// internal DNS version when this blueprint was created - // See blueprint generation for more on this. + // See blueprint execution for more on this. pub internal_dns_version: Generation, + /// external DNS version when thi blueprint was created + // See blueprint execution for more on this. + pub external_dns_version: Generation, + /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) @@ -305,6 +309,8 @@ pub struct BlueprintMetadata { pub parent_blueprint_id: Option, /// internal DNS version when this blueprint was created pub internal_dns_version: Generation, + /// external DNS version when this blueprint was created + pub external_dns_version: Generation, /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index c67fe27652..09b5f1e5ab 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2341,6 +2341,14 @@ "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", "type": "string" }, + "external_dns_version": { + "description": "external DNS version when thi blueprint was created", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, "id": { "description": "unique identifier for this blueprint", "type": "string", @@ -2385,6 +2393,7 @@ "required": [ "comment", "creator", + "external_dns_version", "id", "internal_dns_version", "omicron_zones", @@ -2404,6 +2413,14 @@ "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", "type": "string" }, + "external_dns_version": { + "description": "external DNS version when this blueprint was created", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, "id": { "description": "unique identifier for this blueprint", "type": "string", @@ -2432,6 +2449,7 @@ "required": [ "comment", "creator", + "external_dns_version", "id", "internal_dns_version", "time_created" diff --git a/schema/crdb/42.0.0/up1.sql b/schema/crdb/42.0.0/up1.sql new file mode 100644 index 0000000000..e8ae49c7c8 --- /dev/null +++ b/schema/crdb/42.0.0/up1.sql @@ -0,0 +1,8 @@ +-- Add the "external_dns_version" column to the "blueprint" table. +-- This query will end up setting the external DNS version for any existing +-- blueprints to 1. This is always safe because it's the smallest possible +-- value and if a value is too small, the end result is simply needing to +-- regenerate the blueprint in order to be able to execute it. (On the other +-- hand, using a value that's too large could cause corruption.) +ALTER TABLE omicron.public.blueprint + ADD COLUMN IF NOT EXISTS external_dns_version INT8 NOT NULL DEFAULT 1; diff --git a/schema/crdb/42.0.0/up2.sql b/schema/crdb/42.0.0/up2.sql new file mode 100644 index 0000000000..cc89117e1d --- /dev/null +++ b/schema/crdb/42.0.0/up2.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.blueprint + ALTER COLUMN external_dns_version DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 7c3b8fcc34..9895464fb5 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3159,7 +3159,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( comment TEXT NOT NULL, -- identifies the latest internal DNS version when blueprint planning began - internal_dns_version INT8 NOT NULL + internal_dns_version INT8 NOT NULL, + -- identifies the latest external DNS version when blueprint planning began + external_dns_version INT8 NOT NULL ); -- table describing both the current and historical target blueprints of the @@ -3584,7 +3586,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '41.0.0', NULL) + ( TRUE, NOW(), NOW(), '42.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;