Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update internal DNS during blueprint execution #4989

Merged
merged 19 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion clients/dns-service-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
progenitor::generate_api!(
spec = "../../openapi/dns-server.json",
inner_type = slog::Logger,
derives = [schemars::JsonSchema, Eq, PartialEq],
derives = [schemars::JsonSchema, Clone, Eq, PartialEq],
pre_hook = (|log: &slog::Logger, request: &reqwest::Request| {
slog::debug!(log, "client request";
"method" => %request.method(),
Expand Down
6 changes: 6 additions & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,12 @@ impl From<&Generation> for i64 {
}
}

impl From<Generation> for u64 {
fn from(g: Generation) -> Self {
g.0
}
}

impl From<u32> for Generation {
fn from(value: u32) -> Self {
Generation(u64::from(value))
Expand Down
2 changes: 1 addition & 1 deletion dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ async fn cmd_nexus_blueprints_diff(
let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context(
|| format!("fetching blueprint {}", args.blueprint2_id),
)?;
println!("{}", b1.diff(&b2));
println!("{}", b1.diff_sleds(&b2));
Ok(())
}

Expand Down
102 changes: 62 additions & 40 deletions internal-dns/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
use crate::names::{ServiceName, DNS_ZONE};
use anyhow::{anyhow, ensure};
use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord};
use omicron_common::api::internal::shared::SwitchLocation;
use omicron_common::api::external::Generation;
use std::collections::BTreeMap;
use std::net::{Ipv6Addr, SocketAddrV6};
use std::net::Ipv6Addr;
use uuid::Uuid;

/// Zones that can be referenced within the internal DNS system.
Expand Down Expand Up @@ -147,8 +147,6 @@ pub struct DnsConfigBuilder {
/// network
sleds: BTreeMap<Sled, Ipv6Addr>,

scrimlets: BTreeMap<SwitchLocation, SocketAddrV6>,

/// set of hosts of type "zone" that have been configured so far, mapping
/// each zone's unique uuid to its sole IPv6 address on the control plane
/// network
Expand All @@ -163,6 +161,9 @@ pub struct DnsConfigBuilder {

/// similar to service_instances_zones, but for services that run on sleds
service_instances_sleds: BTreeMap<ServiceName, BTreeMap<Sled, u16>>,

/// generation number for this config
generation: Generation,
}

/// Describes a host of type "sled" in the control plane DNS zone
Expand Down Expand Up @@ -192,16 +193,17 @@ impl DnsConfigBuilder {
DnsConfigBuilder {
sleds: BTreeMap::new(),
zones: BTreeMap::new(),
scrimlets: BTreeMap::new(),
service_instances_zones: BTreeMap::new(),
service_instances_sleds: BTreeMap::new(),
generation: Generation::new(),
}
}

/// Add a new host of type "sled" to the configuration
///
/// Returns a [`Sled`] that can be used with [`Self::service_backend_sled()`] to
/// specify that this sled is a backend for some higher-level service.
/// Returns a [`Sled`] that can be used with
/// [`Self::service_backend_sled()`] to specify that this sled is a backend
/// for some higher-level service.
///
/// # Errors
///
Expand All @@ -223,19 +225,11 @@ impl DnsConfigBuilder {
}
}

pub fn host_scrimlet(
&mut self,
switch_location: SwitchLocation,
addr: SocketAddrV6,
) -> anyhow::Result<()> {
self.scrimlets.insert(switch_location, addr);
Ok(())
}

/// Add a new dendrite host of type "zone" to the configuration
///
/// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to
/// specify that this zone is a backend for some higher-level service.
/// Returns a [`Zone`] that can be used with
/// [`Self::service_backend_zone()`] to specify that this zone is a backend
/// for some higher-level service.
///
/// # Errors
///
Expand All @@ -251,8 +245,9 @@ impl DnsConfigBuilder {

/// Add a new host of type "zone" to the configuration
///
/// Returns a [`Zone`] that can be used with [`Self::service_backend_zone()`] to
/// specify that this zone is a backend for some higher-level service.
/// Returns a [`Zone`] that can be used with
/// [`Self::service_backend_zone()`] to specify that this zone is a backend
/// for some higher-level service.
///
/// # Errors
///
Expand Down Expand Up @@ -363,6 +358,52 @@ impl DnsConfigBuilder {
}
}

/// Higher-level shorthand for adding a zone with a single backend service
///
/// # Errors
///
/// This function fails only if the given zone has already been added to the
/// configuration.
pub fn host_zone_with_one_backend(
&mut self,
zone_id: Uuid,
addr: Ipv6Addr,
service: ServiceName,
port: u16,
) -> anyhow::Result<()> {
let zone = self.host_zone(zone_id, addr)?;
self.service_backend_zone(service, &zone, port)
}

/// Higher-level shorthand for adding a "switch" zone with its usual set of
/// backend services
///
/// # Errors
///
/// This function fails only if the given zone has already been added to the
/// configuration.
pub fn host_zone_switch(
&mut self,
sled_id: Uuid,
switch_zone_ip: Ipv6Addr,
dendrite_port: u16,
mgs_port: u16,
mgd_port: u16,
) -> anyhow::Result<()> {
let zone = self.host_dendrite(sled_id, switch_zone_ip)?;
self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?;
self.service_backend_zone(
ServiceName::ManagementGatewayService,
&zone,
mgs_port,
)?;
self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)
}

pub fn generation(&mut self, generation: Generation) {
self.generation = generation;
}

/// Construct a complete [`DnsConfigParams`] (suitable for propagating to
/// our DNS servers) for the control plane DNS zone described up to this
/// point
Expand All @@ -378,23 +419,6 @@ impl DnsConfigBuilder {
(zone.dns_name(), vec![DnsRecord::Aaaa(zone_ip)])
});

let scrimlet_srv_records =
self.scrimlets.clone().into_iter().map(|(location, addr)| {
let srv = DnsRecord::Srv(dns_service_client::types::Srv {
prio: 0,
weight: 0,
port: addr.port(),
target: format!("{location}.scrimlet.{}", DNS_ZONE),
});
(ServiceName::Scrimlet(location).dns_name(), vec![srv])
});

let scrimlet_aaaa_records =
self.scrimlets.into_iter().map(|(location, addr)| {
let aaaa = DnsRecord::Aaaa(*addr.ip());
(format!("{location}.scrimlet"), vec![aaaa])
});

Comment on lines -381 to -397
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tl;dr: I brought this up in oxide-control-plane chat and sync'd with @rcgoodfellow and @internet-diglett and I believe these DNS names can be removed.

More details:

  • I could not find any references to these DNS names anywhere in Omicron. I looked for "switch0.scrimlet" and "switch1.scrimlet" and even ".scrimlet" (in case it was being filled into a template). If they were used programmatically, they'd show up as ServiceName::Scrimlet. The only place that's used is a comment explaining some context.
  • What I got from chat is what that comment says: these DNS names were added at some point after dogfood was set up. It was then discovered that they couldn't be used for whatever they were going to be used for because these DNS names only get set up at RSS-time, so systems like dogfood were just never going to have them. Instead, a different mechanism was created to find these zones when needed.
  • From what I can tell, the RSS code that assigned these DNS names chose the switch location (switch0 vs. switch1) arbitrarily, based on which one's sled id came lexicographically first. That's potentially inconsistent with what MGS reported as switch0 and switch1. So these names were potentially wrong on any systems where they were set up.
  • It should be possible to augment the code in this PR to reliably establish correct DNS names for these, provided we have an inventory collection with the information that we need from MGS. This way, in the future, we can use DNS for this. I didn't add this to this PR.

// Assemble the set of SRV records, which implicitly point back at
// zones' AAAA records.
let srv_records_zones = self.service_instances_zones.into_iter().map(
Expand Down Expand Up @@ -439,12 +463,10 @@ impl DnsConfigBuilder {
.chain(zone_records)
.chain(srv_records_sleds)
.chain(srv_records_zones)
.chain(scrimlet_aaaa_records)
.chain(scrimlet_srv_records)
.collect();

DnsConfigParams {
generation: 1,
generation: u64::from(self.generation),
time_created: chrono::Utc::now(),
zones: vec![DnsConfigZone {
zone_name: DNS_ZONE.to_owned(),
Expand Down
6 changes: 4 additions & 2 deletions nexus/blueprint-execution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ omicron-rpaths.workspace = true
[dependencies]
anyhow.workspace = true
futures.workspace = true
internal-dns.workspace = true
nexus-db-model.workspace = true
nexus-db-queries.workspace = true
nexus-types.workspace = true
omicron-common.workspace = true
reqwest.workspace = true
sled-agent-client.workspace = true
slog.workspace = true
slog-error-chain.workspace = true
uuid.workspace = true

# See omicron-rpaths for more about the "pq-sys" dependency. This is needed
Expand All @@ -26,9 +30,7 @@ omicron-workspace-hack.workspace = true
[dev-dependencies]
chrono.workspace = true
httptest.workspace = true
nexus-db-model.workspace = true
nexus-test-utils.workspace = true
nexus-test-utils-macros.workspace = true
omicron-common.workspace = true
omicron-nexus.workspace = true
tokio.workspace = true
Loading
Loading