Skip to content

Commit

Permalink
Discretionary internal DNS zones (#6425)
Browse files Browse the repository at this point in the history
Will plan for by default `INTERNAL_DNS_REDUNDANCY = 3`
(note the rename from `DNS_REDUNDANCY`) internal DNS servers,
accepting at most `MAX_INTERNAL_DNS_REDUNDANCY = 5`
as target. Packs allocations of subnets of the reserved rack
subnet for DNS server addresses. The main planning test is
`test_spread_internal_dns_zones_across_sleds()`.

Fixes #6241.
  • Loading branch information
plotnick authored and hawkw committed Aug 31, 2024
1 parent 01454f5 commit e39ff8b
Show file tree
Hide file tree
Showing 24 changed files with 914 additions and 469 deletions.
66 changes: 52 additions & 14 deletions common/src/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! and Nexus, who need to agree upon addressing schemes.
use crate::api::external::{self, Error};
use crate::policy::{DNS_REDUNDANCY, MAX_DNS_REDUNDANCY};
use crate::policy::{INTERNAL_DNS_REDUNDANCY, MAX_INTERNAL_DNS_REDUNDANCY};
use ipnetwork::Ipv6Network;
use once_cell::sync::Lazy;
use oxnet::{Ipv4Net, Ipv6Net};
Expand Down Expand Up @@ -175,7 +175,18 @@ pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF;
pub const SLED_RESERVED_ADDRESSES: u16 = 32;

/// Wraps an [`Ipv6Net`] with a compile-time prefix length.
#[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)]
#[derive(
Debug,
Clone,
Copy,
JsonSchema,
Serialize,
Hash,
PartialEq,
Eq,
PartialOrd,
Ord,
)]
#[schemars(rename = "Ipv6Subnet")]
pub struct Ipv6Subnet<const N: u8> {
net: Ipv6Net,
Expand Down Expand Up @@ -229,12 +240,33 @@ impl<'de, const N: u8> Deserialize<'de> for Ipv6Subnet<N> {
}

/// Represents a subnet which may be used for contacting DNS services.
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
#[derive(
Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord,
)]
pub struct DnsSubnet {
subnet: Ipv6Subnet<SLED_PREFIX>,
}

impl DnsSubnet {
pub fn new(subnet: Ipv6Subnet<SLED_PREFIX>) -> Self {
Self { subnet }
}

/// Makes a new DNS subnet from the high-order bits of an address.
pub fn from_addr(addr: Ipv6Addr) -> Self {
Self::new(Ipv6Subnet::new(addr))
}

/// Returns the DNS subnet.
pub fn subnet(&self) -> Ipv6Subnet<SLED_PREFIX> {
self.subnet
}

/// Returns the reserved rack subnet that contains this DNS subnet.
pub fn rack_subnet(&self) -> ReservedRackSubnet {
ReservedRackSubnet::from_subnet(self.subnet)
}

/// Returns the DNS server address within the subnet.
///
/// This is the first address within the subnet.
Expand All @@ -253,7 +285,7 @@ impl DnsSubnet {

/// A wrapper around an IPv6 network, indicating it is a "reserved" rack
/// subnet which can be used for AZ-wide services.
#[derive(Debug, Clone)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ReservedRackSubnet(pub Ipv6Subnet<RACK_PREFIX>);

impl ReservedRackSubnet {
Expand All @@ -262,17 +294,23 @@ impl ReservedRackSubnet {
ReservedRackSubnet(Ipv6Subnet::<RACK_PREFIX>::new(subnet.net().addr()))
}

/// Infer the reserved rack subnet from a sled/AZ/DNS subnet.
pub fn from_subnet<const N: u8>(subnet: Ipv6Subnet<N>) -> Self {
Self::new(Ipv6Subnet::<AZ_PREFIX>::new(subnet.net().addr()))
}

/// Returns the `index`th DNS subnet from this reserved rack subnet.
pub fn get_dns_subnet(&self, index: u8) -> DnsSubnet {
DnsSubnet::new(get_64_subnet(self.0, index))
}

/// Returns the DNS addresses from this reserved rack subnet.
///
/// These addresses will come from the first [`MAX_DNS_REDUNDANCY`] `/64s` of the
/// [`RACK_PREFIX`] subnet.
/// These addresses will come from the first [`MAX_INTERNAL_DNS_REDUNDANCY`]
/// `/64s` of the [`RACK_PREFIX`] subnet.
pub fn get_dns_subnets(&self) -> Vec<DnsSubnet> {
(0..MAX_DNS_REDUNDANCY)
.map(|idx| {
let subnet =
get_64_subnet(self.0, u8::try_from(idx + 1).unwrap());
DnsSubnet { subnet }
})
(0..MAX_INTERNAL_DNS_REDUNDANCY)
.map(|idx| self.get_dns_subnet(u8::try_from(idx + 1).unwrap()))
.collect()
}
}
Expand All @@ -283,7 +321,7 @@ pub fn get_internal_dns_server_addresses(addr: Ipv6Addr) -> Vec<IpAddr> {
let az_subnet = Ipv6Subnet::<AZ_PREFIX>::new(addr);
let reserved_rack_subnet = ReservedRackSubnet::new(az_subnet);
let dns_subnets =
&reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY];
&reserved_rack_subnet.get_dns_subnets()[0..INTERNAL_DNS_REDUNDANCY];
dns_subnets
.iter()
.map(|dns_subnet| IpAddr::from(dns_subnet.dns_address()))
Expand Down Expand Up @@ -664,7 +702,7 @@ mod test {

// Observe the first DNS subnet within this reserved rack subnet.
let dns_subnets = rack_subnet.get_dns_subnets();
assert_eq!(MAX_DNS_REDUNDANCY, dns_subnets.len());
assert_eq!(MAX_INTERNAL_DNS_REDUNDANCY, dns_subnets.len());

// The DNS address and GZ address should be only differing by one.
assert_eq!(
Expand Down
10 changes: 5 additions & 5 deletions common/src/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ pub const COCKROACHDB_REDUNDANCY: usize = 5;

/// The amount of redundancy for internal DNS servers.
///
/// Must be less than or equal to MAX_DNS_REDUNDANCY.
pub const DNS_REDUNDANCY: usize = 3;
/// Must be less than or equal to MAX_INTERNAL_DNS_REDUNDANCY.
pub const INTERNAL_DNS_REDUNDANCY: usize = 3;

/// The maximum amount of redundancy for DNS servers.
/// The maximum amount of redundancy for internal DNS servers.
///
/// This determines the number of addresses which are reserved for DNS servers.
pub const MAX_DNS_REDUNDANCY: usize = 5;
/// This determines the number of addresses which are reserved for internal DNS servers.
pub const MAX_INTERNAL_DNS_REDUNDANCY: usize = 5;

/// The amount of redundancy for clickhouse servers
///
Expand Down
2 changes: 2 additions & 0 deletions nexus/reconfigurator/execution/src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ mod test {
use omicron_common::api::external::IdentityMetadataCreateParams;
use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY;
use omicron_common::policy::COCKROACHDB_REDUNDANCY;
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
use omicron_common::policy::NEXUS_REDUNDANCY;
use omicron_common::zpool_name::ZpoolName;
use omicron_test_utils::dev::test_setup_log;
Expand Down Expand Up @@ -1526,6 +1527,7 @@ mod test {
service_nic_rows: &[],
target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY,
target_nexus_zone_count: NEXUS_REDUNDANCY,
target_internal_dns_zone_count: INTERNAL_DNS_REDUNDANCY,
target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY,
target_cockroachdb_cluster_version:
CockroachDbClusterVersion::POLICY,
Expand Down
9 changes: 5 additions & 4 deletions nexus/reconfigurator/execution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ mod tests {
use nexus_db_model::SledSystemHardware;
use nexus_db_model::SledUpdate;
use nexus_db_model::Zpool;
use omicron_common::api::external::Error;
use std::collections::BTreeSet;
use uuid::Uuid;

Expand Down Expand Up @@ -342,10 +343,10 @@ mod tests {
PhysicalDiskKind::U2,
sled_id.into_untyped_uuid(),
);
datastore
.physical_disk_insert(&opctx, disk.clone())
.await
.expect("failed to upsert physical disk");
match datastore.physical_disk_insert(&opctx, disk.clone()).await {
Ok(_) | Err(Error::ObjectAlreadyExists { .. }) => (),
Err(e) => panic!("failed to upsert physical disk: {e}"),
}

if pool_inserted.insert(pool_id) {
let zpool = Zpool::new(
Expand Down
106 changes: 94 additions & 12 deletions nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@ use nexus_types::external_api::views::SledState;
use omicron_common::address::get_internal_dns_server_addresses;
use omicron_common::address::get_sled_address;
use omicron_common::address::get_switch_zone_address;
use omicron_common::address::ReservedRackSubnet;
use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES;
use omicron_common::address::DNS_HTTP_PORT;
use omicron_common::address::DNS_PORT;
use omicron_common::address::NTP_PORT;
use omicron_common::address::SLED_RESERVED_ADDRESSES;
use omicron_common::api::external::Generation;
use omicron_common::api::external::Vni;
use omicron_common::api::internal::shared::NetworkInterface;
use omicron_common::api::internal::shared::NetworkInterfaceKind;
use omicron_common::policy::MAX_INTERNAL_DNS_REDUNDANCY;
use omicron_uuid_kinds::ExternalIpKind;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::OmicronZoneKind;
Expand Down Expand Up @@ -73,6 +77,7 @@ use typed_rng::UuidRng;
use super::external_networking::BuilderExternalNetworking;
use super::external_networking::ExternalNetworkingChoice;
use super::external_networking::ExternalSnatNetworkingChoice;
use super::internal_dns::DnsSubnetAllocator;
use super::zones::is_already_expunged;
use super::zones::BuilderZoneState;
use super::zones::BuilderZonesConfig;
Expand Down Expand Up @@ -106,6 +111,12 @@ pub enum Error {
},
#[error("programming error in planner")]
Planner(#[source] anyhow::Error),
#[error("no reserved subnets available for DNS")]
NoAvailableDnsSubnets,
#[error(
"can only have {MAX_INTERNAL_DNS_REDUNDANCY} internal DNS servers"
)]
TooManyDnsServers,
}

/// Describes whether an idempotent "ensure" operation resulted in action taken
Expand Down Expand Up @@ -197,6 +208,7 @@ pub struct BlueprintBuilder<'a> {
input: &'a PlanningInput,
sled_ip_allocators: BTreeMap<SledUuid, IpAllocator>,
external_networking: BuilderExternalNetworking<'a>,
internal_dns_subnets: DnsSubnetAllocator,

// These fields will become part of the final blueprint. See the
// corresponding fields in `Blueprint`.
Expand Down Expand Up @@ -291,6 +303,8 @@ impl<'a> BlueprintBuilder<'a> {

let external_networking =
BuilderExternalNetworking::new(parent_blueprint, input)?;
let internal_dns_subnets =
DnsSubnetAllocator::new(parent_blueprint, input)?;

// Prefer the sled state from our parent blueprint for sleds
// that were in it; there may be new sleds in `input`, in which
Expand Down Expand Up @@ -323,6 +337,7 @@ impl<'a> BlueprintBuilder<'a> {
input,
sled_ip_allocators: BTreeMap::new(),
external_networking,
internal_dns_subnets,
zones: BlueprintZonesBuilder::new(parent_blueprint),
disks: BlueprintDisksBuilder::new(parent_blueprint),
sled_state,
Expand Down Expand Up @@ -619,6 +634,69 @@ impl<'a> BlueprintBuilder<'a> {
Ok(EnsureMultiple::Changed { added, removed })
}

fn sled_add_zone_internal_dns(
&mut self,
sled_id: SledUuid,
gz_address_index: u32,
) -> Result<Ensure, Error> {
let sled_subnet = self.sled_resources(sled_id)?.subnet;
let rack_subnet = ReservedRackSubnet::from_subnet(sled_subnet);
let dns_subnet = self.internal_dns_subnets.alloc(rack_subnet)?;
let address = dns_subnet.dns_address();
let zpool = self.sled_select_zpool(sled_id, ZoneKind::InternalDns)?;
let zone_type =
BlueprintZoneType::InternalDns(blueprint_zone_type::InternalDns {
dataset: OmicronZoneDataset { pool_name: zpool.clone() },
dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0),
http_address: SocketAddrV6::new(address, DNS_HTTP_PORT, 0, 0),
gz_address: dns_subnet.gz_address(),
gz_address_index,
});

let zone = BlueprintZoneConfig {
disposition: BlueprintZoneDisposition::InService,
id: self.rng.zone_rng.next(),
underlay_address: address,
filesystem_pool: Some(zpool),
zone_type,
};

self.sled_add_zone(sled_id, zone)?;
Ok(Ensure::Added)
}

pub fn sled_ensure_zone_multiple_internal_dns(
&mut self,
sled_id: SledUuid,
desired_zone_count: usize,
) -> Result<EnsureMultiple, Error> {
// How many internal DNS zones do we need to add?
let count =
self.sled_num_running_zones_of_kind(sled_id, ZoneKind::InternalDns);
let to_add = match desired_zone_count.checked_sub(count) {
Some(0) => return Ok(EnsureMultiple::NotNeeded),
Some(n) => n,
None => {
return Err(Error::Planner(anyhow!(
"removing an internal DNS zone not yet supported \
(sled {sled_id} has {count}; \
planner wants {desired_zone_count})"
)));
}
};

for i in count..desired_zone_count {
self.sled_add_zone_internal_dns(
sled_id,
i.try_into().map_err(|_| {
Error::Planner(anyhow!("zone index overflow"))
})?,
)?;
}

Ok(EnsureMultiple::Changed { added: to_add, removed: 0 })
}

pub fn sled_ensure_zone_ntp(
&mut self,
sled_id: SledUuid,
Expand All @@ -636,14 +714,18 @@ impl<'a> BlueprintBuilder<'a> {
let sled_subnet = sled_info.subnet;
let ip = self.sled_alloc_ip(sled_id)?;
let ntp_address = SocketAddrV6::new(ip, NTP_PORT, 0, 0);

// Construct the list of internal DNS servers.
//
// It'd be tempting to get this list from the other internal NTP
// servers but there may not be any of those. We could also
// construct this list manually from the set of internal DNS servers
// actually deployed. Instead, we take the same approach as RSS:
// these are at known, fixed addresses relative to the AZ subnet
// (which itself is a known-prefix parent subnet of the sled subnet).
// servers, but there may not be any of those. We could also
// construct it manually from the set of internal DNS servers
// actually deployed, or ask the DNS subnet allocator; but those
// would both require that all the internal DNS zones be added
// before any NTP zones, a constraint we don't currently enforce.
// Instead, we take the same approach as RSS: they are at known,
// fixed addresses relative to the AZ subnet (which itself is a
// known-prefix parent subnet of the sled subnet).
let dns_servers =
get_internal_dns_server_addresses(sled_subnet.net().prefix());

Expand Down Expand Up @@ -1139,13 +1221,13 @@ impl<'a> BlueprintBuilder<'a> {
allocator.alloc().ok_or(Error::OutOfAddresses { sled_id })
}

// Selects a zpools for this zone type.
//
// This zpool may be used for either durable storage or transient
// storage - the usage is up to the caller.
//
// If `zone_kind` already exists on `sled_id`, it is prevented
// from using the same zpool as exisitng zones with the same kind.
/// Selects a zpool for this zone type.
///
/// This zpool may be used for either durable storage or transient
/// storage - the usage is up to the caller.
///
/// If `zone_kind` already exists on `sled_id`, it is prevented
/// from using the same zpool as existing zones with the same kind.
fn sled_select_zpool(
&self,
sled_id: SledUuid,
Expand Down
Loading

0 comments on commit e39ff8b

Please sign in to comment.