diff --git a/Cargo.lock b/Cargo.lock index dd837b9891..b2815d9a1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4283,12 +4283,14 @@ dependencies = [ "http 0.2.11", "hyper 0.14.27", "hyper-rustls 0.26.0", + "illumos-utils", "internal-dns", "ipnetwork", "itertools 0.12.0", "macaddr", "newtype_derive", "nexus-db-model", + "nexus-deployment", "nexus-inventory", "nexus-test-utils", "nexus-types", diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index fef069d536..ea89923caa 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -866,7 +866,7 @@ async fn cmd_nexus_blueprints_target_show( .await .context("fetching target blueprint")?; println!("target blueprint: {}", target.target_id); - println!("set at: {}", target.time_set); + println!("made target at: {}", target.time_made_target); println!("enabled: {}", target.enabled); Ok(()) } diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs new file mode 100644 index 0000000000..34fe08d78c --- /dev/null +++ b/nexus/db-model/src/deployment.rs @@ -0,0 +1,263 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for representing the deployed software and configuration in the +//! database + +use crate::inventory::ZoneType; +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; +use crate::schema::{ + blueprint, bp_omicron_zone, bp_omicron_zone_nic, + bp_omicron_zones_not_in_service, bp_sled_omicron_zones, bp_target, +}; +use crate::{ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use chrono::{DateTime, Utc}; +use ipnetwork::IpNetwork; +use nexus_types::deployment::BlueprintTarget; +use uuid::Uuid; + +/// See [`nexus_types::deployment::Blueprint`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = blueprint)] +pub struct Blueprint { + pub id: Uuid, + pub parent_blueprint_id: Option, + pub time_created: DateTime, + pub creator: String, + pub comment: String, +} + +impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { + fn from(bp: &'_ nexus_types::deployment::Blueprint) -> Self { + Self { + id: bp.id, + parent_blueprint_id: bp.parent_blueprint_id, + time_created: bp.time_created, + creator: bp.creator.clone(), + comment: bp.comment.clone(), + } + } +} + +impl From for nexus_types::deployment::BlueprintMetadata { + fn from(value: Blueprint) -> Self { + Self { + id: value.id, + parent_blueprint_id: value.parent_blueprint_id, + time_created: value.time_created, + creator: value.creator, + comment: value.comment, + } + } +} + +/// See [`nexus_types::deployment::BlueprintTarget`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_target)] +pub struct BpTarget { + pub version: SqlU32, + pub blueprint_id: Uuid, + pub enabled: bool, + pub time_made_target: DateTime, +} + +impl BpTarget { + pub fn new(version: u32, target: BlueprintTarget) -> Self { + Self { + version: version.into(), + blueprint_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + } + } +} + +impl From for nexus_types::deployment::BlueprintTarget { + fn from(value: BpTarget) -> Self { + Self { + target_id: value.blueprint_id, + enabled: value.enabled, + time_made_target: value.time_made_target, + } + } +} + +/// See [`nexus_types::deployment::OmicronZonesConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_sled_omicron_zones)] +pub struct BpSledOmicronZones { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub generation: Generation, +} + +impl BpSledOmicronZones { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zones_config: &nexus_types::deployment::OmicronZonesConfig, + ) -> Self { + Self { + blueprint_id, + sled_id, + generation: Generation(zones_config.generation), + } + } +} + +/// See [`nexus_types::deployment::OmicronZoneConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone)] +pub struct BpOmicronZone { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub id: Uuid, + pub underlay_address: ipv6::Ipv6Addr, + pub zone_type: ZoneType, + pub primary_service_ip: ipv6::Ipv6Addr, + pub primary_service_port: SqlU16, + pub second_service_ip: Option, + pub second_service_port: Option, + pub dataset_zpool_name: Option, + pub bp_nic_id: Option, + pub dns_gz_address: Option, + pub dns_gz_address_index: Option, + pub ntp_ntp_servers: Option>, + pub ntp_dns_servers: Option>, + pub ntp_domain: Option, + pub nexus_external_tls: Option, + pub nexus_external_dns_servers: Option>, + pub snat_ip: Option, + pub snat_first_port: Option, + pub snat_last_port: Option, +} + +impl BpOmicronZone { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { + blueprint_id, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + bp_nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, + }) + } + + pub fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> Result { + let zone = OmicronZone { + sled_id: self.sled_id, + id: self.id, + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.bp_nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone_nic)] +pub struct BpOmicronZoneNic { + blueprint_id: Uuid, + pub id: Uuid, + name: Name, + ip: IpNetwork, + mac: MacAddr, + subnet: IpNetwork, + vni: SqlU32, + is_primary: bool, + slot: SqlU8, +} + +impl From for OmicronZoneNic { + fn from(value: BpOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + +impl BpOmicronZoneNic { + pub fn new( + blueprint_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result, anyhow::Error> { + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + blueprint_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) + } + + pub fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> Result { + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) + } +} + +/// Nexus wants to think in terms of "zones in service", but since most zones of +/// most blueprints are in service, we store the zones NOT in service in the +/// database. We handle that inversion internally in the db-queries layer. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zones_not_in_service)] +pub struct BpOmicronZoneNotInService { + pub blueprint_id: Uuid, + pub bp_omicron_zone_id: Uuid, +} diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 17d74be0aa..d8314f97b8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,6 +4,7 @@ //! Types for representing the hardware/software inventory in the database +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, inv_omicron_zone, inv_omicron_zone_nic, inv_root_of_trust, @@ -14,8 +15,7 @@ use crate::{ impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; -use anyhow::{anyhow, ensure}; -use anyhow::{bail, Context}; +use anyhow::anyhow; use chrono::DateTime; use chrono::Utc; use diesel::backend::Backend; @@ -26,10 +26,8 @@ use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, OmicronZoneType, PowerState, RotPage, - RotSlot, + BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; -use std::net::SocketAddrV6; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -750,165 +748,29 @@ impl InvOmicronZone { sled_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { - let id = zone.id; - let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); - let mut nic_id = None; - let mut dns_gz_address = None; - let mut dns_gz_address_index = None; - let mut ntp_ntp_servers = None; - let mut ntp_dns_servers = None; - let mut ntp_ntp_domain = None; - let mut nexus_external_tls = None; - let mut nexus_external_dns_servers = None; - let mut snat_ip = None; - let mut snat_first_port = None; - let mut snat_last_port = None; - let mut second_service_ip = None; - let mut second_service_port = None; - - let (zone_type, primary_service_sockaddr_str, dataset) = match &zone - .zone_type - { - OmicronZoneType::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - snat_cfg, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - snat_ip = Some(IpNetwork::from(snat_cfg.ip)); - snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); - snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); - nic_id = Some(nic.id); - (ZoneType::BoundaryNtp, address, None) - } - OmicronZoneType::Clickhouse { address, dataset } => { - (ZoneType::Clickhouse, address, Some(dataset)) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - (ZoneType::ClickhouseKeeper, address, Some(dataset)) - } - OmicronZoneType::CockroachDb { address, dataset } => { - (ZoneType::CockroachDb, address, Some(dataset)) - } - OmicronZoneType::Crucible { address, dataset } => { - (ZoneType::Crucible, address, Some(dataset)) - } - OmicronZoneType::CruciblePantry { address } => { - (ZoneType::CruciblePantry, address, None) - } - OmicronZoneType::ExternalDns { - dataset, - http_address, - dns_address, - nic, - } => { - nic_id = Some(nic.id); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for external DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::ExternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - } => { - dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); - dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for internal DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::InternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - (ZoneType::InternalNtp, address, None) - } - OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - } => { - nic_id = Some(nic.id); - nexus_external_tls = Some(*external_tls); - nexus_external_dns_servers = Some(external_dns_servers.clone()); - second_service_ip = Some(*external_ip); - (ZoneType::Nexus, internal_address, None) - } - OmicronZoneType::Oximeter { address } => { - (ZoneType::Oximeter, address, None) - } - }; - - let dataset_zpool_name = - dataset.map(|d| d.pool_name.as_str().to_string()); - let primary_service_sockaddr = primary_service_sockaddr_str - .parse::() - .with_context(|| { - format!( - "parsing socket address for primary IP {:?}", - primary_service_sockaddr_str - ) - })?; - let (primary_service_ip, primary_service_port) = ( - ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), - SqlU16::from(primary_service_sockaddr.port()), - ); - - Ok(InvOmicronZone { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { inv_collection_id, - sled_id, - id, - underlay_address, - zone_type, - primary_service_ip, - primary_service_port, - second_service_ip: second_service_ip.map(IpNetwork::from), - second_service_port, - dataset_zpool_name, - nic_id, - dns_gz_address, - dns_gz_address_index, - ntp_ntp_servers, - ntp_dns_servers: ntp_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - ntp_domain: ntp_ntp_domain, - nexus_external_tls, - nexus_external_dns_servers: nexus_external_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - snat_ip, - snat_first_port, - snat_last_port, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, }) } @@ -916,169 +778,29 @@ impl InvOmicronZone { self, nic_row: Option, ) -> Result { - let address = SocketAddrV6::new( - std::net::Ipv6Addr::from(self.primary_service_ip), - *self.primary_service_port, - 0, - 0, - ) - .to_string(); - - // Assemble a value that we can use to extract the NIC _if necessary_ - // and report an error if it was needed but not found. - // - // Any error here should be impossible. By the time we get here, the - // caller should have provided `nic_row` iff there's a corresponding - // `nic_id` in this row, and the ids should match up. And whoever - // created this row ought to have provided a nic_id iff this type of - // zone needs a NIC. This last issue is not under our control, though, - // so we definitely want to handle that as an operational error. The - // others could arguably be programmer errors (i.e., we could `assert`), - // but it seems excessive to crash here. - // - // Note that we immediately return for any of the caller errors here. - // For the other error, we will return only later, if some code path - // below tries to use `nic` when it's not present. - let nic = match (self.nic_id, nic_row) { - (Some(expected_id), Some(nic_row)) => { - ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); - Ok(nic_row.into_network_interface_for_zone(self.id)?) - } - (None, None) => Err(anyhow!( - "expected zone to have an associated NIC, but it doesn't" - )), - (Some(_), None) => bail!("caller provided no NIC"), - (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), - }; - - // Similarly, assemble a value that we can use to extract the dataset, - // if necessary. We only return this error if code below tries to use - // this value. - let dataset = self - .dataset_zpool_name - .map(|zpool_name| -> Result<_, anyhow::Error> { - Ok(nexus_types::inventory::OmicronZoneDataset { - pool_name: zpool_name.parse().map_err(|e| { - anyhow!("parsing zpool name {:?}: {}", zpool_name, e) - })?, - }) - }) - .transpose()? - .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); - - // Do the same for the DNS server address. - let dns_address = - match (self.second_service_ip, self.second_service_port) { - (Some(dns_ip), Some(dns_port)) => { - Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) - .to_string()) - } - _ => Err(anyhow!( - "expected second service IP and port, \ - found one missing" - )), - }; - - // Do the same for NTP zone properties. - let ntp_dns_servers = self - .ntp_dns_servers - .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) - .map(|list| { - list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() - }); - let ntp_ntp_servers = - self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); - - let zone_type = match self.zone_type { - ZoneType::BoundaryNtp => { - let snat_cfg = match ( - self.snat_ip, - self.snat_first_port, - self.snat_last_port, - ) { - (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig { - ip: ip.ip(), - first_port: *first_port, - last_port: *last_port, - } - } - _ => bail!( - "expected non-NULL snat properties, \ - found at least one NULL" - ), - }; - OmicronZoneType::BoundaryNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - nic: nic?, - ntp_servers: ntp_ntp_servers?, - snat_cfg, - } - } - ZoneType::Clickhouse => { - OmicronZoneType::Clickhouse { address, dataset: dataset? } - } - ZoneType::ClickhouseKeeper => { - OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } - } - ZoneType::CockroachDb => { - OmicronZoneType::CockroachDb { address, dataset: dataset? } - } - ZoneType::Crucible => { - OmicronZoneType::Crucible { address, dataset: dataset? } - } - ZoneType::CruciblePantry => { - OmicronZoneType::CruciblePantry { address } - } - ZoneType::ExternalDns => OmicronZoneType::ExternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - nic: nic?, - }, - ZoneType::InternalDns => OmicronZoneType::InternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - gz_address: *self.dns_gz_address.ok_or_else(|| { - anyhow!("expected dns_gz_address, found none") - })?, - gz_address_index: *self.dns_gz_address_index.ok_or_else( - || anyhow!("expected dns_gz_address_index, found none"), - )?, - }, - ZoneType::InternalNtp => OmicronZoneType::InternalNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - ntp_servers: ntp_ntp_servers?, - }, - ZoneType::Nexus => OmicronZoneType::Nexus { - internal_address: address, - nic: nic?, - external_tls: self - .nexus_external_tls - .ok_or_else(|| anyhow!("expected 'external_tls'"))?, - external_ip: self - .second_service_ip - .ok_or_else(|| anyhow!("expected second service IP"))? - .ip(), - external_dns_servers: self - .nexus_external_dns_servers - .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? - .into_iter() - .map(|i| i.ip()) - .collect(), - }, - ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, - }; - Ok(nexus_types::inventory::OmicronZoneConfig { + let zone = OmicronZone { + sled_id: self.sled_id, id: self.id, - underlay_address: std::net::Ipv6Addr::from(self.underlay_address), - zone_type, - }) + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) } } @@ -1096,63 +818,45 @@ pub struct InvOmicronZoneNic { slot: SqlU8, } +impl From for OmicronZoneNic { + fn from(value: InvOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + impl InvOmicronZoneNic { pub fn new( inv_collection_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result, anyhow::Error> { - match &zone.zone_type { - OmicronZoneType::ExternalDns { nic, .. } - | OmicronZoneType::BoundaryNtp { nic, .. } - | OmicronZoneType::Nexus { nic, .. } => { - // We do not bother storing the NIC's kind and associated id - // because it should be inferrable from the other information - // that we have. Verify that here. - ensure!( - matches!( - nic.kind, - nexus_types::inventory::NetworkInterfaceKind::Service( - id - ) if id == zone.id - ), - "expected zone's NIC kind to be \"service\" and the \ - id to match the zone's id ({})", - zone.id - ); - - Ok(Some(InvOmicronZoneNic { - inv_collection_id, - id: nic.id, - name: Name::from(nic.name.clone()), - ip: IpNetwork::from(nic.ip), - mac: MacAddr::from(nic.mac), - subnet: IpNetwork::from(nic.subnet.clone()), - vni: SqlU32::from(u32::from(nic.vni)), - is_primary: nic.primary, - slot: SqlU8::from(nic.slot), - })) - } - _ => Ok(None), - } + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + inv_collection_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) } pub fn into_network_interface_for_zone( self, zone_id: Uuid, ) -> Result { - Ok(nexus_types::inventory::NetworkInterface { - id: self.id, - ip: self.ip.ip(), - kind: nexus_types::inventory::NetworkInterfaceKind::Service( - zone_id, - ), - mac: *self.mac, - name: self.name.into(), - primary: self.is_primary, - slot: *self.slot, - vni: omicron_common::api::external::Vni::try_from(*self.vni) - .context("parsing VNI")?, - subnet: self.subnet.into(), - }) + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 5c0a68c253..7fa95822a7 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -52,7 +52,9 @@ mod switch_port; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod deployment; mod ipv4_nat_entry; +mod omicron_zone_config; pub mod queries; mod quota; mod rack; @@ -114,6 +116,7 @@ pub use console_session::*; pub use dataset::*; pub use dataset_kind::*; pub use db_metadata::*; +pub use deployment::*; pub use device_auth::*; pub use digest::*; pub use disk::*; diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs new file mode 100644 index 0000000000..f4726ccd92 --- /dev/null +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -0,0 +1,456 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for sharing nontrivial conversions between various `OmicronZoneConfig` +//! database serializations and the corresponding Nexus/sled-agent type +//! +//! Both inventory and deployment have nearly-identical tables to serialize +//! `OmicronZoneConfigs` that are collected or generated, respectively. We +//! expect those tables to diverge over time (e.g., inventory may start +//! collecting extra metadata like uptime). This module provides conversion +//! helpers for the parts of those tables that are common between the two. + +use std::net::SocketAddrV6; + +use crate::inventory::ZoneType; +use crate::{ipv6, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use anyhow::{anyhow, bail, ensure, Context}; +use ipnetwork::IpNetwork; +use nexus_types::inventory::OmicronZoneType; +use uuid::Uuid; + +#[derive(Debug)] +pub(crate) struct OmicronZone { + pub(crate) sled_id: Uuid, + pub(crate) id: Uuid, + pub(crate) underlay_address: ipv6::Ipv6Addr, + pub(crate) zone_type: ZoneType, + pub(crate) primary_service_ip: ipv6::Ipv6Addr, + pub(crate) primary_service_port: SqlU16, + pub(crate) second_service_ip: Option, + pub(crate) second_service_port: Option, + pub(crate) dataset_zpool_name: Option, + pub(crate) nic_id: Option, + pub(crate) dns_gz_address: Option, + pub(crate) dns_gz_address_index: Option, + pub(crate) ntp_ntp_servers: Option>, + pub(crate) ntp_dns_servers: Option>, + pub(crate) ntp_domain: Option, + pub(crate) nexus_external_tls: Option, + pub(crate) nexus_external_dns_servers: Option>, + pub(crate) snat_ip: Option, + pub(crate) snat_first_port: Option, + pub(crate) snat_last_port: Option, +} + +impl OmicronZone { + pub(crate) fn new( + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result { + let id = zone.id; + let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); + let mut nic_id = None; + let mut dns_gz_address = None; + let mut dns_gz_address_index = None; + let mut ntp_ntp_servers = None; + let mut ntp_dns_servers = None; + let mut ntp_ntp_domain = None; + let mut nexus_external_tls = None; + let mut nexus_external_dns_servers = None; + let mut snat_ip = None; + let mut snat_first_port = None; + let mut snat_last_port = None; + let mut second_service_ip = None; + let mut second_service_port = None; + + let (zone_type, primary_service_sockaddr_str, dataset) = match &zone + .zone_type + { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); + snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); + nic_id = Some(nic.id); + (ZoneType::BoundaryNtp, address, None) + } + OmicronZoneType::Clickhouse { address, dataset } => { + (ZoneType::Clickhouse, address, Some(dataset)) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + (ZoneType::ClickhouseKeeper, address, Some(dataset)) + } + OmicronZoneType::CockroachDb { address, dataset } => { + (ZoneType::CockroachDb, address, Some(dataset)) + } + OmicronZoneType::Crucible { address, dataset } => { + (ZoneType::Crucible, address, Some(dataset)) + } + OmicronZoneType::CruciblePantry { address } => { + (ZoneType::CruciblePantry, address, None) + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => { + nic_id = Some(nic.id); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for external DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::ExternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => { + dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); + dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for internal DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::InternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + (ZoneType::InternalNtp, address, None) + } + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => { + nic_id = Some(nic.id); + nexus_external_tls = Some(*external_tls); + nexus_external_dns_servers = Some(external_dns_servers.clone()); + second_service_ip = Some(*external_ip); + (ZoneType::Nexus, internal_address, None) + } + OmicronZoneType::Oximeter { address } => { + (ZoneType::Oximeter, address, None) + } + }; + + let dataset_zpool_name = + dataset.map(|d| d.pool_name.as_str().to_string()); + let primary_service_sockaddr = primary_service_sockaddr_str + .parse::() + .with_context(|| { + format!( + "parsing socket address for primary IP {:?}", + primary_service_sockaddr_str + ) + })?; + let (primary_service_ip, primary_service_port) = ( + ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), + SqlU16::from(primary_service_sockaddr.port()), + ); + + Ok(Self { + sled_id, + id, + underlay_address, + zone_type, + primary_service_ip, + primary_service_port, + second_service_ip: second_service_ip.map(IpNetwork::from), + second_service_port, + dataset_zpool_name, + nic_id, + dns_gz_address, + dns_gz_address_index, + ntp_ntp_servers, + ntp_dns_servers: ntp_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + ntp_domain: ntp_ntp_domain, + nexus_external_tls, + nexus_external_dns_servers: nexus_external_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + snat_ip, + snat_first_port, + snat_last_port, + }) + } + + pub(crate) fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> anyhow::Result { + let address = SocketAddrV6::new( + std::net::Ipv6Addr::from(self.primary_service_ip), + *self.primary_service_port, + 0, + 0, + ) + .to_string(); + + // Assemble a value that we can use to extract the NIC _if necessary_ + // and report an error if it was needed but not found. + // + // Any error here should be impossible. By the time we get here, the + // caller should have provided `nic_row` iff there's a corresponding + // `nic_id` in this row, and the ids should match up. And whoever + // created this row ought to have provided a nic_id iff this type of + // zone needs a NIC. This last issue is not under our control, though, + // so we definitely want to handle that as an operational error. The + // others could arguably be programmer errors (i.e., we could `assert`), + // but it seems excessive to crash here. + // + // Note that we immediately return for any of the caller errors here. + // For the other error, we will return only later, if some code path + // below tries to use `nic` when it's not present. + let nic = match (self.nic_id, nic_row) { + (Some(expected_id), Some(nic_row)) => { + ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); + Ok(nic_row.into_network_interface_for_zone(self.id)?) + } + // We don't expect and don't have a NIC. This is reasonable, so we + // don't `bail!` like we do in the next two cases, but we also + // _don't have a NIC_. Put an error into `nic`, and then if we land + // in a zone below that expects one, we'll fail then. + (None, None) => Err(anyhow!( + "expected zone to have an associated NIC, but it doesn't" + )), + (Some(_), None) => bail!("caller provided no NIC"), + (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), + }; + + // Similarly, assemble a value that we can use to extract the dataset, + // if necessary. We only return this error if code below tries to use + // this value. + let dataset = self + .dataset_zpool_name + .map(|zpool_name| -> Result<_, anyhow::Error> { + Ok(nexus_types::inventory::OmicronZoneDataset { + pool_name: zpool_name.parse().map_err(|e| { + anyhow!("parsing zpool name {:?}: {}", zpool_name, e) + })?, + }) + }) + .transpose()? + .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); + + // Do the same for the DNS server address. + let dns_address = + match (self.second_service_ip, self.second_service_port) { + (Some(dns_ip), Some(dns_port)) => { + Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) + .to_string()) + } + _ => Err(anyhow!( + "expected second service IP and port, \ + found one missing" + )), + }; + + // Do the same for NTP zone properties. + let ntp_dns_servers = self + .ntp_dns_servers + .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) + .map(|list| { + list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() + }); + let ntp_ntp_servers = + self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig { + ip: ip.ip(), + first_port: *first_port, + last_port: *last_port, + } + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + OmicronZoneType::BoundaryNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + ntp_servers: ntp_ntp_servers?, + snat_cfg, + } + } + ZoneType::Clickhouse => { + OmicronZoneType::Clickhouse { address, dataset: dataset? } + } + ZoneType::ClickhouseKeeper => { + OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } + } + ZoneType::CockroachDb => { + OmicronZoneType::CockroachDb { address, dataset: dataset? } + } + ZoneType::Crucible => { + OmicronZoneType::Crucible { address, dataset: dataset? } + } + ZoneType::CruciblePantry => { + OmicronZoneType::CruciblePantry { address } + } + ZoneType::ExternalDns => OmicronZoneType::ExternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + nic: nic?, + }, + ZoneType::InternalDns => OmicronZoneType::InternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + gz_address: *self.dns_gz_address.ok_or_else(|| { + anyhow!("expected dns_gz_address, found none") + })?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ZoneType::InternalNtp => OmicronZoneType::InternalNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + ntp_servers: ntp_ntp_servers?, + }, + ZoneType::Nexus => OmicronZoneType::Nexus { + internal_address: address, + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_ip: self + .second_service_ip + .ok_or_else(|| anyhow!("expected second service IP"))? + .ip(), + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? + .into_iter() + .map(|i| i.ip()) + .collect(), + }, + ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, + }; + Ok(nexus_types::inventory::OmicronZoneConfig { + id: self.id, + underlay_address: std::net::Ipv6Addr::from(self.underlay_address), + zone_type, + }) + } +} + +#[derive(Debug)] +pub(crate) struct OmicronZoneNic { + pub(crate) id: Uuid, + pub(crate) name: Name, + pub(crate) ip: IpNetwork, + pub(crate) mac: MacAddr, + pub(crate) subnet: IpNetwork, + pub(crate) vni: SqlU32, + pub(crate) is_primary: bool, + pub(crate) slot: SqlU8, +} + +impl OmicronZoneNic { + pub(crate) fn new( + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result> { + match &zone.zone_type { + OmicronZoneType::ExternalDns { nic, .. } + | OmicronZoneType::BoundaryNtp { nic, .. } + | OmicronZoneType::Nexus { nic, .. } => { + // We do not bother storing the NIC's kind and associated id + // because it should be inferrable from the other information + // that we have. Verify that here. + ensure!( + matches!( + nic.kind, + nexus_types::inventory::NetworkInterfaceKind::Service( + id + ) if id == zone.id + ), + "expected zone's NIC kind to be \"service\" and the \ + id to match the zone's id ({})", + zone.id + ); + + Ok(Some(Self { + id: nic.id, + name: Name::from(nic.name.clone()), + ip: IpNetwork::from(nic.ip), + mac: MacAddr::from(nic.mac), + subnet: IpNetwork::from(nic.subnet.clone()), + vni: SqlU32::from(u32::from(nic.vni)), + is_primary: nic.primary, + slot: SqlU8::from(nic.slot), + })) + } + _ => Ok(None), + } + } + + pub(crate) fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> anyhow::Result { + Ok(nexus_types::inventory::NetworkInterface { + id: self.id, + ip: self.ip.ip(), + kind: nexus_types::inventory::NetworkInterfaceKind::Service( + zone_id, + ), + mac: *self.mac, + name: self.name.into(), + primary: self.is_primary, + slot: *self.slot, + vni: omicron_common::api::external::Vni::try_from(*self.vni) + .context("parsing VNI")?, + subnet: self.subnet.into(), + }) + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index eb71a12f04..ddb5ba8e03 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(27, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(28, 0, 0); table! { disk (id) { @@ -1388,6 +1388,89 @@ table! { } } +/* blueprints */ + +table! { + blueprint (id) { + id -> Uuid, + + parent_blueprint_id -> Nullable, + + time_created -> Timestamptz, + creator -> Text, + comment -> Text, + } +} + +table! { + bp_target (version) { + version -> Int8, + + blueprint_id -> Uuid, + + enabled -> Bool, + time_made_target -> Timestamptz, + } +} + +table! { + bp_sled_omicron_zones (blueprint_id, sled_id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + generation -> Int8, + } +} + +table! { + bp_omicron_zone (blueprint_id, id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + id -> Uuid, + underlay_address -> Inet, + zone_type -> crate::ZoneTypeEnum, + + primary_service_ip -> Inet, + primary_service_port -> Int4, + second_service_ip -> Nullable, + second_service_port -> Nullable, + dataset_zpool_name -> Nullable, + bp_nic_id -> Nullable, + dns_gz_address -> Nullable, + dns_gz_address_index -> Nullable, + ntp_ntp_servers -> Nullable>, + ntp_dns_servers -> Nullable>, + ntp_domain -> Nullable, + nexus_external_tls -> Nullable, + nexus_external_dns_servers -> Nullable>, + snat_ip -> Nullable, + snat_first_port -> Nullable, + snat_last_port -> Nullable, + } +} + +table! { + bp_omicron_zone_nic (blueprint_id, id) { + blueprint_id -> Uuid, + id -> Uuid, + name -> Text, + ip -> Inet, + mac -> Int8, + subnet -> Inet, + vni -> Int8, + is_primary -> Bool, + slot -> Int2, + } +} + +table! { + bp_omicron_zones_not_in_service (blueprint_id, bp_omicron_zone_id) { + blueprint_id -> Uuid, + bp_omicron_zone_id -> Uuid, + } +} + table! { bootstore_keys (key, generation) { key -> Text, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 3240c54f3f..9cdcc88e6a 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -64,8 +64,10 @@ camino-tempfile.workspace = true expectorate.workspace = true hyper-rustls.workspace = true gateway-client.workspace = true +illumos-utils.workspace = true internal-dns.workspace = true itertools.workspace = true +nexus-deployment.workspace = true nexus-inventory.workspace = true nexus-test-utils.workspace = true omicron-sled-agent.workspace = true diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs new file mode 100644 index 0000000000..72adb1d3df --- /dev/null +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -0,0 +1,1583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::authz; +use crate::authz::ApiResource; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use crate::db::DbConnection; +use crate::db::TransactionError; +use anyhow::Context; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::DateTime; +use chrono::Utc; +use diesel::expression::SelectableHelper; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::result::DatabaseErrorKind; +use diesel::result::Error as DieselError; +use diesel::sql_types; +use diesel::Column; +use diesel::ExpressionMethods; +use diesel::OptionalExtension; +use diesel::QueryDsl; +use diesel::RunQueryDsl; +use nexus_db_model::Blueprint as DbBlueprint; +use nexus_db_model::BpOmicronZone; +use nexus_db_model::BpOmicronZoneNic; +use nexus_db_model::BpOmicronZoneNotInService; +use nexus_db_model::BpSledOmicronZones; +use nexus_db_model::BpTarget; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::OmicronZonesConfig; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use omicron_common::bail_unless; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::num::NonZeroU32; +use uuid::Uuid; + +/// "limit" used in SQL queries that paginate through all sleds, omicron +/// zones, etc. +/// +/// While we always load an entire blueprint in one operation, we use a +/// [`Paginator`] to guard against single queries returning an unchecked number +/// of rows. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; + +impl DataStore { + /// List blueprints + pub async fn blueprints_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::blueprint; + + opctx + .authorize(authz::Action::ListChildren, &authz::BLUEPRINT_CONFIG) + .await?; + + let blueprints = paginated(blueprint::table, blueprint::id, pagparams) + .select(DbBlueprint::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(blueprints.into_iter().map(BlueprintMetadata::from).collect()) + } + + /// Store a complete blueprint into the database + pub async fn blueprint_insert( + &self, + opctx: &OpContext, + blueprint: &Blueprint, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + // In the database, the blueprint is represented essentially as a tree + // rooted at a `blueprint` row. Other nodes in the tree point + // back at the `blueprint` via `blueprint_id`. + // + // It's helpful to assemble some values before entering the transaction + // so that we can produce the `Error` type that we want here. + let row_blueprint = DbBlueprint::from(blueprint); + let blueprint_id = row_blueprint.id; + let sled_omicron_zones = blueprint + .omicron_zones + .iter() + .map(|(sled_id, zones_config)| { + BpSledOmicronZones::new(blueprint_id, *sled_id, zones_config) + }) + .collect::>(); + let omicron_zones = blueprint + .omicron_zones + .iter() + .flat_map(|(sled_id, zones_config)| { + zones_config.zones.iter().map(|zone| { + BpOmicronZone::new(blueprint_id, *sled_id, zone) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + }) + }) + .collect::, Error>>()?; + let omicron_zone_nics = blueprint + .omicron_zones + .values() + .flat_map(|zones_config| { + zones_config.zones.iter().filter_map(|zone| { + BpOmicronZoneNic::new(blueprint_id, zone) + .with_context(|| format!("zone {:?}", zone.id)) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + .transpose() + }) + }) + .collect::, _>>()?; + + // `Blueprint` stores a set of zones in service, but in the database we + // store the set of zones NOT in service (which we expect to be much + // smaller, often empty). Build that inverted set here. + let omicron_zones_not_in_service = { + let mut zones_not_in_service = Vec::new(); + for zone in &omicron_zones { + if !blueprint.zones_in_service.contains(&zone.id) { + zones_not_in_service.push(BpOmicronZoneNotInService { + blueprint_id, + bp_omicron_zone_id: zone.id, + }); + } + } + zones_not_in_service + }; + + // This implementation inserts all records associated with the + // blueprint in one transaction. This is required: we don't want + // any planner or executor to see a half-inserted blueprint, nor do we + // want to leave a partial blueprint around if we crash. However, it + // does mean this is likely to be a big transaction and if that becomes + // a problem we could break this up as long as we address those + // problems. + // + // The SQL here is written so that it doesn't have to be an + // *interactive* transaction. That is, it should in principle be + // possible to generate all this SQL up front and send it as one big + // batch rather than making a bunch of round-trips to the database. + // We'd do that if we had an interface for doing that with bound + // parameters, etc. See oxidecomputer/omicron#973. + let pool = self.pool_connection_authorized(opctx).await?; + pool.transaction_async(|conn| async move { + // Insert the row for the blueprint. + { + use db::schema::blueprint::dsl; + let _: usize = diesel::insert_into(dsl::blueprint) + .values(row_blueprint) + .execute_async(&conn) + .await?; + } + + // Insert all the Omicron zones for this blueprint. + { + use db::schema::bp_sled_omicron_zones::dsl as sled_zones; + let _ = diesel::insert_into(sled_zones::bp_sled_omicron_zones) + .values(sled_omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone::dsl as omicron_zone; + let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) + .values(omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone_nic::dsl as omicron_zone_nic; + let _ = + diesel::insert_into(omicron_zone_nic::bp_omicron_zone_nic) + .values(omicron_zone_nics) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zones_not_in_service::dsl; + let _ = + diesel::insert_into(dsl::bp_omicron_zones_not_in_service) + .values(omicron_zones_not_in_service) + .execute_async(&conn) + .await?; + } + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + info!( + &opctx.log, + "inserted blueprint"; + "blueprint_id" => %blueprint.id, + ); + + Ok(()) + } + + /// Read a complete blueprint from the database + pub async fn blueprint_read( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_blueprint).await?; + let conn = self.pool_connection_authorized(opctx).await?; + let blueprint_id = authz_blueprint.id(); + + // Read the metadata from the primary blueprint row, and ensure that it + // exists. + let (parent_blueprint_id, time_created, creator, comment) = { + use db::schema::blueprint::dsl; + + let Some(blueprint) = dsl::blueprint + .filter(dsl::id.eq(blueprint_id)) + .select(DbBlueprint::as_select()) + .get_result_async(&*conn) + .await + .optional() + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + else { + return Err(authz_blueprint.not_found()); + }; + + ( + blueprint.parent_blueprint_id, + blueprint.time_created, + blueprint.creator, + blueprint.comment, + ) + }; + + // Read this blueprint's `bp_sled_omicron_zones` rows, which describes + // the `OmicronZonesConfig` generation number for each sled that is a + // part of this blueprint. Construct the BTreeMap we ultimately need, + // but all the `zones` vecs will be empty until our next query below. + let mut omicron_zones: BTreeMap = { + use db::schema::bp_sled_omicron_zones::dsl; + + let mut omicron_zones = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_sled_omicron_zones, + dsl::sled_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpSledOmicronZones::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|s| s.sled_id); + + for s in batch { + let old = omicron_zones.insert( + s.sled_id, + OmicronZonesConfig { + generation: *s.generation, + zones: Vec::new(), + }, + ); + bail_unless!( + old.is_none(), + "found duplicate sled ID in bp_sled_omicron_zones: {}", + s.sled_id + ); + } + } + + omicron_zones + }; + + // Assemble a mutable map of all the NICs found, by NIC id. As we + // match these up with the corresponding zone below, we'll remove items + // from this set. That way we can tell if the same NIC was used twice + // or not used at all. + let mut omicron_zone_nics = { + use db::schema::bp_omicron_zone_nic::dsl; + + let mut omicron_zone_nics = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zone_nic, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|n| n.id); + + for n in batch { + let nic_id = n.id; + let old = omicron_zone_nics.insert(nic_id, n); + bail_unless!( + old.is_none(), + "found duplicate NIC ID in bp_omicron_zone_nic: {}", + nic_id, + ); + } + } + + omicron_zone_nics + }; + + // Load the list of not-in-service zones. Similar to NICs, we'll use a + // mutable set of zone IDs so we can tell if a zone we expected to be + // inactive wasn't present in the blueprint at all. + let mut omicron_zones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + + let mut omicron_zones_not_in_service = BTreeSet::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zones_not_in_service, + dsl::bp_omicron_zone_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNotInService::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.bp_omicron_zone_id); + + for z in batch { + let inserted = omicron_zones_not_in_service + .insert(z.bp_omicron_zone_id); + bail_unless!( + inserted, + "found duplicate zone ID in \ + bp_omicron_zones_not_in_service: {}", + z.bp_omicron_zone_id, + ); + } + } + + omicron_zones_not_in_service + }; + + // Create the in-memory list of zones _in_ service, which we'll + // calculate below as we load zones. (Any zone that isn't present in + // `omicron_zones_not_in_service` is considered in service.) + let mut zones_in_service = BTreeSet::new(); + + // Load all the zones for each sled. + { + use db::schema::bp_omicron_zone::dsl; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + // `paginated` implicitly orders by our `id`, which is also + // handy for testing: the zones are always consistently ordered + let batch = paginated( + dsl::bp_omicron_zone, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZone::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.id); + + for z in batch { + let nic_row = z + .bp_nic_id + .map(|id| { + // This error means that we found a row in + // bp_omicron_zone that references a NIC by id but + // there's no corresponding row in + // bp_omicron_zone_nic with that id. This should be + // impossible and reflects either a bug or database + // corruption. + omicron_zone_nics.remove(&id).ok_or_else(|| { + Error::internal_error(&format!( + "zone {:?}: expected to find NIC {:?}, \ + but didn't", + z.id, z.bp_nic_id + )) + }) + }) + .transpose()?; + let sled_zones = + omicron_zones.get_mut(&z.sled_id).ok_or_else(|| { + // This error means that we found a row in + // bp_omicron_zone with no associated record in + // bp_sled_omicron_zones. This should be + // impossible and reflects either a bug or database + // corruption. + Error::internal_error(&format!( + "zone {:?}: unknown sled: {:?}", + z.id, z.sled_id + )) + })?; + let zone_id = z.id; + let zone = z + .into_omicron_zone_config(nic_row) + .with_context(|| { + format!("zone {:?}: parse from database", zone_id) + }) + .map_err(|e| { + Error::internal_error(&format!( + "{:#}", + e.to_string() + )) + })?; + sled_zones.zones.push(zone); + + // If we can remove `zone_id` from + // `omicron_zones_not_in_service`, then the zone is not in + // service. Otherwise, add it to the list of in-service + // zones. + if !omicron_zones_not_in_service.remove(&zone_id) { + zones_in_service.insert(zone_id); + } + } + } + } + + bail_unless!( + omicron_zone_nics.is_empty(), + "found extra Omicron zone NICs: {:?}", + omicron_zone_nics.keys() + ); + bail_unless!( + omicron_zones_not_in_service.is_empty(), + "found extra Omicron zones not in service: {:?}", + omicron_zones_not_in_service, + ); + + Ok(Blueprint { + id: blueprint_id, + omicron_zones, + zones_in_service, + parent_blueprint_id, + time_created, + creator, + comment, + }) + } + + /// Delete a blueprint from the database + pub async fn blueprint_delete( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Delete, authz_blueprint).await?; + let blueprint_id = authz_blueprint.id(); + + // As with inserting a whole blueprint, we remove it in one big + // transaction. Similar considerations apply. We could + // break it up if these transactions become too big. But we'd need a + // way to stop other clients from discovering a collection after we + // start removing it and we'd also need to make sure we didn't leak a + // collection if we crash while deleting it. + let conn = self.pool_connection_authorized(opctx).await?; + + let ( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + ) = conn + .transaction_async(|conn| async move { + // Ensure that blueprint we're about to delete is not the + // current target. + let current_target = + self.blueprint_current_target_only(&conn).await?; + if let Some(current_target) = current_target { + if current_target.target_id == blueprint_id { + return Err(TransactionError::CustomError( + Error::conflict(format!( + "blueprint {blueprint_id} is the \ + current target and cannot be deleted", + )), + )); + } + } + + // Remove the record describing the blueprint itself. + let nblueprints = { + use db::schema::blueprint::dsl; + diesel::delete( + dsl::blueprint.filter(dsl::id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + // Bail out if this blueprint didn't exist; there won't be + // references to it in any of the remaining tables either, since + // deletion always goes through this transaction. + if nblueprints == 0 { + return Err(TransactionError::CustomError( + authz_blueprint.not_found(), + )); + } + + // Remove rows associated with Omicron zones + let nsled_agent_zones = { + use db::schema::bp_sled_omicron_zones::dsl; + diesel::delete( + dsl::bp_sled_omicron_zones + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones = { + use db::schema::bp_omicron_zone::dsl; + diesel::delete( + dsl::bp_omicron_zone + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nnics = { + use db::schema::bp_omicron_zone_nic::dsl; + diesel::delete( + dsl::bp_omicron_zone_nic + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + diesel::delete( + dsl::bp_omicron_zones_not_in_service + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + Ok(( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + )) + }) + .await + .map_err(|error| match error { + TransactionError::CustomError(e) => e, + TransactionError::Database(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; + + info!(&opctx.log, "removed blueprint"; + "blueprint_id" => blueprint_id.to_string(), + "nblueprints" => nblueprints, + "nsled_agent_zones" => nsled_agent_zones, + "nzones" => nzones, + "nnics" => nnics, + "nzones_not_in_service" => nzones_not_in_service, + ); + + Ok(()) + } + + /// Set the current target blueprint + /// + /// In order to become the target blueprint, `target`'s parent blueprint + /// must be the current target + pub async fn blueprint_target_set_current( + &self, + opctx: &OpContext, + target: BlueprintTarget, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + let query = InsertTargetQuery { + target_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + }; + + let conn = self.pool_connection_authorized(opctx).await?; + + query + .execute_async(&*conn) + .await + .map_err(|e| Error::from(query.decode_error(e)))?; + + Ok(()) + } + + /// Get the current target blueprint, if one exists + /// + /// Returns both the metadata about the target and the full blueprint + /// contents. If you only need the target metadata, use + /// `blueprint_target_get_current` instead. + pub async fn blueprint_target_get_current_full( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + + let conn = self.pool_connection_authorized(opctx).await?; + let Some(target) = self.blueprint_current_target_only(&conn).await? + else { + return Ok(None); + }; + + // The blueprint for the current target cannot be deleted while it is + // the current target, but it's possible someone else (a) made a new + // blueprint the target and (b) deleted the blueprint pointed to by our + // `target` between the above query and the below query. In such a case, + // this query will fail with an "unknown blueprint ID" error. This + // should be rare in practice. + let authz_blueprint = authz_blueprint_from_id(target.target_id); + let blueprint = self.blueprint_read(opctx, &authz_blueprint).await?; + + Ok(Some((target, blueprint))) + } + + /// Get the current target blueprint, if one exists + pub async fn blueprint_target_get_current( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + let conn = self.pool_connection_authorized(opctx).await?; + self.blueprint_current_target_only(&conn).await + } + + // Helper to fetch the current blueprint target (without fetching the entire + // blueprint for that target). + // + // Caller is responsible for checking authz for this operation. + async fn blueprint_current_target_only( + &self, + conn: &async_bb8_diesel::Connection, + ) -> Result, Error> { + use db::schema::bp_target::dsl; + + let current_target = dsl::bp_target + .order_by(dsl::version.desc()) + .first_async::(conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(current_target.map(BlueprintTarget::from)) + } +} + +// Helper to create an `authz::Blueprint` for a specific blueprint ID +fn authz_blueprint_from_id(blueprint_id: Uuid) -> authz::Blueprint { + authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ) +} + +/// Errors related to inserting a target blueprint +#[derive(Debug)] +enum InsertTargetError { + /// The requested target blueprint ID does not exist in the blueprint table. + NoSuchBlueprint(Uuid), + /// The requested target blueprint's parent does not match the current + /// target. + ParentNotTarget(Uuid), + /// Any other error + Other(DieselError), +} + +impl From for Error { + fn from(value: InsertTargetError) -> Self { + match value { + InsertTargetError::NoSuchBlueprint(id) => { + Error::not_found_by_id(ResourceType::Blueprint, &id) + } + InsertTargetError::ParentNotTarget(id) => { + Error::invalid_request(format!( + "Blueprint {id}'s parent blueprint is not the current \ + target blueprint" + )) + } + InsertTargetError::Other(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + } + } +} + +/// Query to insert a new current target blueprint. +/// +/// The `bp_target` table's primary key is the `version` field, and we enforce +/// the following invariants: +/// +/// * The first "current target" blueprint is assigned version 1. +/// * In order to be inserted as the first current target blueprint, a +/// blueprint must have a parent_blueprint_id of NULL. +/// * After the first, any subsequent blueprint can only be assigned as the +/// current target if its parent_blueprint_id is the current target blueprint. +/// * When inserting a new child blueprint as the current target, it is assigned +/// a version of 1 + its parent's version. +/// +/// The result of this is a linear history of blueprints, where each target is a +/// direct child of the previous current target. Enforcing the above has some +/// subtleties (particularly around handling the "first blueprint with no +/// parent" case). These are expanded on below through inline comments on the +/// query we generate: +/// +/// ```sql +/// WITH +/// -- Subquery to fetch the current target (i.e., the row with the max +/// -- veresion in `bp_target`). +/// current_target AS ( +/// SELECT +/// "version" AS version, +/// "blueprint_id" AS blueprint_id +/// FROM "bp_target" +/// ORDER BY "version" DESC +/// LIMIT 1 +/// ), +/// +/// -- Error checking subquery: This uses similar tricks as elsewhere in +/// -- this crate to `CAST(... AS UUID)` with non-UUID values that result +/// -- in runtime errors in specific cases, allowing us to give accurate +/// -- error messages. +/// -- +/// -- These checks are not required for correct behavior by the insert +/// -- below. If we removed them, the insert would insert 0 rows if +/// -- these checks would have failed. But they make it easier to report +/// -- specific problems to our caller. +/// -- +/// -- The specific cases we check here are noted below. +/// check_validity AS MATERIALIZED ( +/// SELECT CAST(IF( +/// -- Return `no-such-blueprint` if the ID we're being told to +/// -- set as the target doesn't exist in the blueprint table. +/// (SELECT "id" FROM "blueprint" WHERE "id" = ) IS NULL, +/// 'no-such-blueprint', +/// IF( +/// -- Check for whether our new target's parent matches our current +/// -- target. There are two cases here: The first is the common case +/// -- (i.e., the new target has a parent: does it match the current +/// -- target ID?). The second is the bootstrapping check: if we're +/// -- trying to insert a new target that does not have a parent, +/// -- we should not have a current target at all. +/// -- +/// -- If either of these cases fails, we return `parent-not-target`. +/// ( +/// SELECT "parent_blueprint_id" FROM "blueprint", current_target +/// WHERE +/// "id" = +/// AND current_target.blueprint_id = "parent_blueprint_id" +/// ) IS NOT NULL +/// OR +/// ( +/// SELECT 1 FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// ) = 1, +/// , +/// 'parent-not-target' +/// ) +/// ) AS UUID) +/// ), +/// +/// -- Determine the new version number to use: either 1 if this is the +/// -- first blueprint being made the current target, or 1 higher than +/// -- the previous target's version. +/// -- +/// -- The final clauses of each of these WHERE clauses repeat the +/// -- checks performed above in `check_validity`, and will cause this +/// -- subquery to return no rows if we should not allow the new +/// -- target to be set. +/// new_target AS ( +/// SELECT 1 AS new_version FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// UNION +/// SELECT current_target.version + 1 FROM current_target, "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NOT NULL +/// AND "parent_blueprint_id" = current_target.blueprint_id +/// ) +/// +/// -- Perform the actual insertion. +/// INSERT INTO "bp_target"( +/// "version","blueprint_id","enabled","time_made_target" +/// ) +/// SELECT +/// new_target.new_version, +/// , +/// , +/// +/// FROM new_target +/// ``` +#[derive(Debug, Clone, Copy)] +struct InsertTargetQuery { + target_id: Uuid, + enabled: bool, + time_made_target: DateTime, +} + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when it does not exist in the blueprint table. +const NO_SUCH_BLUEPRINT_SENTINEL: &str = "no-such-blueprint"; + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when its parent_blueprint_id is not the current target. +const PARENT_NOT_TARGET_SENTINEL: &str = "parent-not-target"; + +// Error messages generated from the above sentinel values. +const NO_SUCH_BLUEPRINT_ERROR_MESSAGE: &str = + "could not parse \"no-such-blueprint\" as type uuid: \ + uuid: incorrect UUID length: no-such-blueprint"; +const PARENT_NOT_TARGET_ERROR_MESSAGE: &str = + "could not parse \"parent-not-target\" as type uuid: \ + uuid: incorrect UUID length: parent-not-target"; + +impl InsertTargetQuery { + fn decode_error(&self, err: DieselError) -> InsertTargetError { + match err { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == NO_SUCH_BLUEPRINT_ERROR_MESSAGE => + { + InsertTargetError::NoSuchBlueprint(self.target_id) + } + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == PARENT_NOT_TARGET_ERROR_MESSAGE => + { + InsertTargetError::ParentNotTarget(self.target_id) + } + other => InsertTargetError::Other(other), + } + } +} + +impl QueryId for InsertTargetQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for InsertTargetQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + use crate::db::schema::blueprint::dsl as bp_dsl; + use crate::db::schema::bp_target::dsl; + + type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; + type BpTargetFromClause = FromClause; + type BlueprintFromClause = FromClause; + const BP_TARGET_FROM_CLAUSE: BpTargetFromClause = + BpTargetFromClause::new(); + const BLUEPRINT_FROM_CLAUSE: BlueprintFromClause = + BlueprintFromClause::new(); + + out.push_sql("WITH "); + + out.push_sql("current_target AS (SELECT "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" AS version,"); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(" AS blueprint_id FROM "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" ORDER BY "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" DESC LIMIT 1),"); + + out.push_sql( + "check_validity AS MATERIALIZED ( \ + SELECT \ + CAST( \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(") IS NULL, "); + out.push_bind_param::( + &NO_SUCH_BLUEPRINT_SENTINEL, + )?; + out.push_sql( + ", \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(", current_target WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND current_target.blueprint_id = "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " ) IS NOT NULL \ + OR \ + (SELECT 1 FROM ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS ( \ + SELECT version FROM current_target) \ + ) = 1, ", + ); + out.push_bind_param::(&self.target_id)?; + out.push_sql(", "); + out.push_bind_param::( + &PARENT_NOT_TARGET_SENTINEL, + )?; + out.push_sql( + " ) \ + ) \ + AS UUID) \ + ), ", + ); + + out.push_sql("new_target AS (SELECT 1 AS new_version FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS \ + (SELECT version FROM current_target) \ + UNION \ + SELECT current_target.version + 1 FROM \ + current_target, ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" IS NOT NULL AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" = current_target.blueprint_id) "); + + out.push_sql("INSERT INTO "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql("("); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::enabled::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::time_made_target::NAME)?; + out.push_sql(") SELECT new_target.new_version, "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(","); + out.push_bind_param::(&self.enabled)?; + out.push_sql(","); + out.push_bind_param::>( + &self.time_made_target, + )?; + out.push_sql(" FROM new_target"); + + Ok(()) + } +} + +impl RunQueryDsl for InsertTargetQuery {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::datastore_test; + use nexus_deployment::blueprint_builder::BlueprintBuilder; + use nexus_deployment::blueprint_builder::Ensure; + use nexus_inventory::now_db_precision; + use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::Policy; + use nexus_types::deployment::SledResources; + use nexus_types::inventory::Collection; + use omicron_common::address::Ipv6Subnet; + use omicron_test_utils::dev; + use rand::thread_rng; + use rand::Rng; + use std::mem; + use std::net::Ipv6Addr; + + static EMPTY_POLICY: Policy = Policy { sleds: BTreeMap::new() }; + + // This is a not-super-future-maintainer-friendly helper to check that all + // the subtables related to blueprints have been pruned of a specific + // blueprint ID. If additional blueprint tables are added in the future, + // this function will silently ignore them unless they're manually added. + async fn ensure_blueprint_fully_deleted( + datastore: &DataStore, + blueprint_id: Uuid, + ) { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + macro_rules! query_count { + ($table:ident, $blueprint_id_col:ident) => {{ + use db::schema::$table::dsl; + let result = dsl::$table + .filter(dsl::$blueprint_id_col.eq(blueprint_id)) + .count() + .get_result_async(&*conn) + .await; + (stringify!($table), result) + }}; + } + + for (table_name, result) in [ + query_count!(blueprint, id), + query_count!(bp_omicron_zone, blueprint_id), + query_count!(bp_omicron_zone_nic, blueprint_id), + query_count!(bp_omicron_zones_not_in_service, blueprint_id), + ] { + let count: i64 = result.unwrap(); + assert_eq!( + count, 0, + "nonzero row count for blueprint \ + {blueprint_id} in table {table_name}" + ); + } + } + + // Create a fake set of `SledResources`, either with a subnet matching + // `ip` or with an arbitrary one. + fn fake_sled_resources(ip: Option) -> SledResources { + use illumos_utils::zpool::ZpoolName; + let zpools = (0..4) + .map(|_| { + let name = ZpoolName::new_external(Uuid::new_v4()).to_string(); + name.parse().unwrap() + }) + .collect(); + let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); + SledResources { zpools, subnet: Ipv6Subnet::new(ip) } + } + + // Create a `Policy` that contains all the sleds found in `collection` + fn policy_from_collection(collection: &Collection) -> Policy { + Policy { + sleds: collection + .sled_agents + .iter() + .map(|(sled_id, agent)| { + // `Collection` doesn't currently hold zpool names, so + // we'll construct fake resources for each sled. + ( + *sled_id, + fake_sled_resources(Some( + *agent.sled_agent_address.ip(), + )), + ) + }) + .collect(), + } + } + + fn representative() -> (Collection, Policy, Blueprint) { + // We'll start with a representative collection... + let mut collection = + nexus_inventory::examples::representative().builder.build(); + + // ...and then mutate it such that the omicron zones it reports match + // the sled agent IDs it reports. Steal the sled agent info and drop the + // fake sled-agent IDs: + let mut empty_map = BTreeMap::new(); + mem::swap(&mut empty_map, &mut collection.sled_agents); + let mut sled_agents = empty_map.into_values().collect::>(); + + // Now reinsert them with IDs pulled from the omicron zones. This + // assumes we have more fake sled agents than omicron zones, which is + // currently true for the representative collection. + for &sled_id in collection.omicron_zones.keys() { + let some_sled_agent = sled_agents.pop().expect( + "fewer representative sled agents than \ + representative omicron zones sleds", + ); + collection.sled_agents.insert(sled_id, some_sled_agent); + } + + let policy = policy_from_collection(&collection); + let blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + (collection, policy, blueprint) + } + + async fn blueprint_list_all_ids( + opctx: &OpContext, + datastore: &DataStore, + ) -> Vec { + datastore + .blueprints_list(opctx, &DataPageParams::max_page()) + .await + .unwrap() + .into_iter() + .map(|bp| bp.id) + .collect() + } + + #[tokio::test] + async fn test_empty_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create an empty collection and a blueprint from it + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test", + ) + .unwrap(); + let authz_blueprint = authz_blueprint_from_id(blueprint1.id); + + // Trying to read it from the database should fail with the relevant + // "not found" error. + let err = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .unwrap_err(); + assert_eq!(err, authz_blueprint.not_found()); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // There ought to be no sleds or zones in service, and no parent + // blueprint. + assert_eq!(blueprint1.omicron_zones.len(), 0); + assert_eq!(blueprint1.zones_in_service.len(), 0); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Trying to insert the same blueprint again should fail. + let err = + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap_err(); + assert!(err.to_string().contains("duplicate key")); + + // Delete the blueprint and ensure it's really gone. + datastore.blueprint_delete(&opctx, &authz_blueprint).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!(blueprint_list_all_ids(&opctx, &datastore).await, []); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_representative_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a cohesive representative collection/policy/blueprint + let (collection, mut policy, blueprint1) = representative(); + let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint1) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // Check the number of blueprint elements against our collection. + assert_eq!(blueprint1.omicron_zones.len(), policy.sleds.len()); + assert_eq!( + blueprint1.omicron_zones.len(), + collection.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count(), + collection.all_omicron_zones().count() + ); + // All zones should be in service. + assert_eq!( + blueprint1.zones_in_service.len(), + blueprint1.all_omicron_zones().count() + ); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Set blueprint1 as the current target, and ensure that we cannot + // delete it (as the current target cannot be deleted). + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint1) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint1.id + )), + "unexpected error: {err}" + ); + + // Add a new sled to `policy`. + let new_sled_id = Uuid::new_v4(); + policy.sleds.insert(new_sled_id, fake_sled_resources(None)); + let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; + + // Create a builder for a child blueprint. + let mut builder = + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test"); + + // Add zones to our new sled. + assert_eq!( + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), + Ensure::Added + ); + for zpool_name in new_sled_zpools { + assert_eq!( + builder + .sled_ensure_zone_crucible(new_sled_id, zpool_name.clone()) + .unwrap(), + Ensure::Added + ); + } + let num_new_sled_zones = 1 + new_sled_zpools.len(); + + let blueprint2 = builder.build(); + let authz_blueprint2 = authz_blueprint_from_id(blueprint2.id); + + // Check that we added the new sled and its zones. + assert_eq!( + blueprint1.omicron_zones.len() + 1, + blueprint2.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count() + num_new_sled_zones, + blueprint2.all_omicron_zones().count() + ); + + // All zones should be in service. + assert_eq!( + blueprint2.zones_in_service.len(), + blueprint2.all_omicron_zones().count() + ); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + + // Check that we can write it to the DB and read it back. + datastore + .blueprint_insert(&opctx, &blueprint2) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint2) + .await + .expect("failed to read collection back"); + println!("diff: {}", blueprint2.diff(&blueprint_read)); + assert_eq!(blueprint2, blueprint_read); + { + let mut expected_ids = [blueprint1.id, blueprint2.id]; + expected_ids.sort(); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + expected_ids + ); + } + + // Set blueprint2 as the current target and ensure that means we can not + // delete it. + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp2_target, blueprint2.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint2) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint2.id + )), + "unexpected error: {err}" + ); + + // Now that blueprint2 is the target, we should be able to delete + // blueprint1. + datastore.blueprint_delete(&opctx, &authz_blueprint1).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint2.id] + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_set_target() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Trying to insert a target that doesn't reference a blueprint should + // fail with a relevant error message. + let nonexistent_blueprint_id = Uuid::new_v4(); + let err = datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: nonexistent_blueprint_id, + enabled: true, + time_made_target: now_db_precision(), + }, + ) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::NoSuchBlueprint( + nonexistent_blueprint_id + )) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // Create three blueprints: + // * `blueprint1` has no parent + // * `blueprint2` and `blueprint3` both have `blueprint1` as parent + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test1", + ) + .unwrap(); + let blueprint2 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") + .build(); + let blueprint3 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint1.parent_blueprint_id, None); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + assert_eq!(blueprint3.parent_blueprint_id, Some(blueprint1.id)); + + // Insert all three into the blueprint table. + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint2).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint3).await.unwrap(); + + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp3_target = BlueprintTarget { + target_id: blueprint3.id, + enabled: true, + time_made_target: now_db_precision(), + }; + + // Attempting to make blueprint2 the current target should fail because + // it has a non-NULL parent_blueprint_id, but there is no current target + // (i.e., only a blueprint with no parent can be made the current + // target). + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // We should be able to insert blueprint1, which has no parent (matching + // the currently-empty `bp_target` table's lack of a target). + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + + // Now that blueprint1 is the current target, we should be able to + // insert blueprint2 or blueprint3. WLOG, pick blueprint3. + datastore + .blueprint_target_set_current(&opctx, bp3_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp3_target, blueprint3.clone())) + ); + + // Now that blueprint3 is the target, trying to insert blueprint1 or + // blueprint2 should fail, because neither of their parents (NULL and + // blueprint1, respectively) match the current target. + let err = datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint1.id)) + ); + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // Create a child of blueprint3, and ensure when we set it as the target + // with enabled=false, that status is serialized. + let blueprint4 = + BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); + datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); + let bp4_target = BlueprintTarget { + target_id: blueprint4.id, + enabled: false, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp4_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp4_target, blueprint4)) + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 78a7aeda87..96832b25bf 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -54,6 +54,7 @@ mod certificate; mod console_session; mod dataset; mod db_metadata; +mod deployment; mod device_auth; mod disk; mod dns; diff --git a/nexus/deployment/Cargo.toml b/nexus/deployment/Cargo.toml index b166f947bf..115dec98a5 100644 --- a/nexus/deployment/Cargo.toml +++ b/nexus/deployment/Cargo.toml @@ -9,6 +9,7 @@ chrono.workspace = true internal-dns.workspace = true ipnet.workspace = true ipnetwork.workspace = true +nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true slog.workspace = true @@ -18,6 +19,5 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -nexus-inventory.workspace = true omicron-test-utils.workspace = true sled-agent-client.workspace = true diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 689e2d8e2c..ac2fe70e6b 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -9,6 +9,7 @@ use anyhow::anyhow; use internal_dns::config::Host; use internal_dns::config::ZoneVariant; use ipnet::IpAdd; +use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; @@ -94,7 +95,7 @@ impl<'a> BlueprintBuilder<'a> { .sleds .keys() .map(|sled_id| { - let zones = collection + let mut zones = collection .omicron_zones .get(sled_id) .map(|z| z.zones.clone()) @@ -118,6 +119,11 @@ impl<'a> BlueprintBuilder<'a> { sled_id )) })?; + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + Ok((*sled_id, zones)) }) .collect::>()?; @@ -125,10 +131,10 @@ impl<'a> BlueprintBuilder<'a> { collection.all_omicron_zones().map(|z| z.id).collect(); Ok(Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service, parent_blueprint_id: None, - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), }) @@ -162,7 +168,7 @@ impl<'a> BlueprintBuilder<'a> { .map(|sled_id| { // Start with self.omicron_zones, which contains entries for any // sled whose zones config is changing in this blueprint. - let zones = self + let mut zones = self .omicron_zones .remove(sled_id) // If it's not there, use the config from the parent @@ -180,15 +186,20 @@ impl<'a> BlueprintBuilder<'a> { generation: Generation::new(), zones: vec![], }); + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + (*sled_id, zones) }) .collect(); Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 62d338c1ee..08a905143c 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -96,7 +96,7 @@ impl CollectionBuilder { pub fn new(collector: &str) -> Self { CollectionBuilder { errors: vec![], - time_started: now(), + time_started: now_db_precision(), collector: collector.to_owned(), baseboards: BTreeSet::new(), cabooses: BTreeSet::new(), @@ -122,7 +122,7 @@ impl CollectionBuilder { id: Uuid::new_v4(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), time_started: self.time_started, - time_done: now(), + time_done: now_db_precision(), collector: self.collector, baseboards: self.baseboards, cabooses: self.cabooses, @@ -178,7 +178,7 @@ impl CollectionBuilder { // Separate the SP state into the SP-specific state and the RoT state, // if any. - let now = now(); + let now = now_db_precision(); let _ = self.sps.entry(baseboard.clone()).or_insert_with(|| { ServiceProcessor { time_collected: now, @@ -279,7 +279,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), CabooseFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), caboose: sw_caboose.clone(), }, @@ -348,7 +348,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), RotPageFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), page: sw_rot_page.clone(), }, @@ -456,7 +456,7 @@ impl CollectionBuilder { usable_hardware_threads: inventory.usable_hardware_threads, usable_physical_ram: inventory.usable_physical_ram, reservoir_size: inventory.reservoir_size, - time_collected: now(), + time_collected: now_db_precision(), sled_id, }; @@ -491,7 +491,7 @@ impl CollectionBuilder { self.omicron_zones.insert( sled_id, OmicronZonesFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_string(), sled_id, zones, @@ -507,7 +507,7 @@ impl CollectionBuilder { /// This exists because the database doesn't store nanosecond-precision, so if /// we store nanosecond-precision timestamps, then DateTime conversion is lossy /// when round-tripping through the database. That's rather inconvenient. -fn now() -> DateTime { +pub fn now_db_precision() -> DateTime { let ts = Utc::now(); let nanosecs = ts.timestamp_subsec_nanos(); let micros = ts.timestamp_subsec_micros(); @@ -517,7 +517,7 @@ fn now() -> DateTime { #[cfg(test)] mod test { - use super::now; + use super::now_db_precision; use super::CollectionBuilder; use crate::examples::representative; use crate::examples::sp_state; @@ -541,10 +541,10 @@ mod test { // Verify the contents of an empty collection. #[test] fn test_empty() { - let time_before = now(); + let time_before = now_db_precision(); let builder = CollectionBuilder::new("test_empty"); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); assert!(collection.errors.is_empty()); assert!(time_before <= collection.time_started); @@ -577,7 +577,7 @@ mod test { // a useful quick check. #[test] fn test_basic() { - let time_before = now(); + let time_before = now_db_precision(); let Representative { builder, sleds: [sled1_bb, sled2_bb, sled3_bb, sled4_bb], @@ -587,7 +587,7 @@ mod test { [sled_agent_id_basic, sled_agent_id_extra, sled_agent_id_pc, sled_agent_id_unknown], } = representative(); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); println!("{:#?}", collection); assert!(time_before <= collection.time_started); assert!(collection.time_started <= collection.time_done); diff --git a/nexus/inventory/src/lib.rs b/nexus/inventory/src/lib.rs index f11af8fede..6dee7bb7ec 100644 --- a/nexus/inventory/src/lib.rs +++ b/nexus/inventory/src/lib.rs @@ -27,6 +27,8 @@ pub use builder::CollectionBuilder; pub use builder::CollectorBug; pub use builder::InventoryError; +pub use builder::now_db_precision; + pub use collector::Collector; pub use sled_agent_enumerator::SledAgentEnumerator; diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 9439cdc6d5..b9718a0367 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -5,13 +5,12 @@ //! Configuration of the deployment system use nexus_db_queries::authz; -use nexus_db_queries::authz::Action; -use nexus_db_queries::authz::ApiResource; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::deployment::Policy; @@ -27,7 +26,6 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -47,28 +45,6 @@ const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; const SQL_LIMIT_INVENTORY: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; -/// Temporary in-memory store of blueprints -/// -/// Blueprints eventually need to be stored in the database. That will obviate -/// the need for this structure. -pub struct Blueprints { - all_blueprints: BTreeMap, - target: BlueprintTarget, -} - -impl Blueprints { - pub fn new() -> Blueprints { - Blueprints { - all_blueprints: BTreeMap::new(), - target: BlueprintTarget { - target_id: None, - enabled: false, - time_set: chrono::Utc::now(), - }, - } - } -} - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, @@ -76,30 +52,14 @@ struct PlanningContext { } impl super::Nexus { - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(Action::ListChildren, &authz::BLUEPRINT_CONFIG).await?; - Ok(self - .blueprints - .lock() - .unwrap() - .all_blueprints - .values() - .filter_map(|f| match pagparams.marker { - None => Some(f.clone()), - Some(marker) if f.id > *marker => Some(f.clone()), - _ => None, - }) - .collect()) + ) -> ListResultVec { + self.db_datastore.blueprints_list(opctx, pagparams).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_view( &self, opctx: &OpContext, @@ -110,18 +70,9 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Read, &blueprint).await?; - self.blueprints - .lock() - .unwrap() - .all_blueprints - .get(&blueprint_id) - .cloned() - .ok_or_else(|| blueprint.not_found()) + self.db_datastore.blueprint_read(opctx, &blueprint).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_delete( &self, opctx: &OpContext, @@ -132,90 +83,35 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Delete, &blueprint).await?; - - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(target_id) = blueprints.target.target_id { - if target_id == blueprint_id { - return Err(Error::conflict(format!( - "blueprint {} is the current target and cannot be deleted", - blueprint_id - ))); - } - } - - if blueprints.all_blueprints.remove(&blueprint_id).is_none() { - return Err(blueprint.not_found()); - } - - Ok(()) + self.db_datastore.blueprint_delete(opctx, &blueprint).await } pub async fn blueprint_target_view( &self, opctx: &OpContext, - ) -> Result { - self.blueprint_target(opctx).await.map(|(target, _)| target) - } - - // This is a stand-in for a datastore function that fetches the current - // target information and the target blueprint's contents. This helper - // exists to combine the authz check with the lookup, which is what the - // datastore function will eventually do. - async fn blueprint_target( - &self, - opctx: &OpContext, - ) -> Result<(BlueprintTarget, Option), Error> { - opctx.authorize(Action::Read, &authz::BLUEPRINT_CONFIG).await?; - let blueprints = self.blueprints.lock().unwrap(); - Ok(( - blueprints.target.clone(), - blueprints.target.target_id.and_then(|target_id| { - blueprints.all_blueprints.get(&target_id).cloned() - }), - )) + ) -> Result, Error> { + self.db_datastore.blueprint_target_get_current(opctx).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_target_set( &self, opctx: &OpContext, params: BlueprintTargetSet, ) -> Result { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let new_target_id = params.target_id; - let enabled = params.enabled; - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(blueprint) = blueprints.all_blueprints.get(&new_target_id) { - if blueprint.parent_blueprint_id != blueprints.target.target_id { - return Err(Error::conflict(&format!( - "blueprint {:?}: parent is {:?}, which is not the current \ - target {:?}", - new_target_id, - blueprint - .parent_blueprint_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - blueprints - .target - .target_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - ))); - } - blueprints.target = BlueprintTarget { - target_id: Some(new_target_id), - enabled, - time_set: chrono::Utc::now(), - }; + let new_target = BlueprintTarget { + target_id: params.target_id, + enabled: params.enabled, + time_made_target: chrono::Utc::now(), + }; + + self.db_datastore + .blueprint_target_set_current(opctx, new_target) + .await?; + + // When we add a background task executing the target blueprint, + // this is the point where we'd signal it to update its target. - // When we add a background task executing the target blueprint, - // this is the point where we'd signal it to update its target. - Ok(blueprints.target.clone()) - } else { - Err(Error::not_found_by_id(ResourceType::Blueprint, &new_target_id)) - } + Ok(new_target) } async fn blueprint_planning_context( @@ -286,20 +182,12 @@ impl super::Nexus { Ok(PlanningContext { creator, policy: Policy { sleds } }) } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. async fn blueprint_add( &self, opctx: &OpContext, - blueprint: Blueprint, + blueprint: &Blueprint, ) -> Result<(), Error> { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let mut blueprints = self.blueprints.lock().unwrap(); - assert!(blueprints - .all_blueprints - .insert(blueprint.id, blueprint) - .is_none()); - Ok(()) + self.db_datastore.blueprint_insert(opctx, blueprint).await } pub async fn blueprint_generate_from_collection( @@ -329,7 +217,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } @@ -337,8 +225,9 @@ impl super::Nexus { &self, opctx: &OpContext, ) -> CreateResult { - let (_, maybe_parent) = self.blueprint_target(opctx).await?; - let Some(parent_blueprint) = maybe_parent else { + let maybe_target = + self.db_datastore.blueprint_target_get_current_full(opctx).await?; + let Some((_, parent_blueprint)) = maybe_target else { return Err(Error::conflict( "cannot regenerate blueprint without existing target", )); @@ -358,7 +247,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d6ad7c98ea..bf8522452a 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -183,10 +183,6 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, - - /// information about blueprints (deployment configurations) - // This will go away once these are stored in the database. - blueprints: std::sync::Mutex, } impl Nexus { @@ -419,7 +415,6 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), - blueprints: std::sync::Mutex::new(deployment::Blueprints::new()), }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 58038cb37a..0122d9b439 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -26,6 +26,8 @@ use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; @@ -45,7 +47,6 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; -use serde::Serialize; use std::collections::BTreeMap; use std::sync::Arc; use uuid::Uuid; @@ -620,7 +621,7 @@ async fn ipv4_nat_changeset( async fn blueprint_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -631,7 +632,7 @@ async fn blueprint_list( Ok(HttpResponseOk(ScanById::results_page( &query, blueprints, - &|_, blueprint: &Blueprint| blueprint.id, + &|_, blueprint: &BlueprintMetadata| blueprint.id, )?)) }; @@ -680,35 +681,6 @@ async fn blueprint_delete( // Managing the current target blueprint -/// Describes what blueprint, if any, the system is currently working toward -#[derive(Debug, Serialize, JsonSchema)] -pub struct BlueprintTarget { - /// id of the blueprint that the system is trying to make real - pub target_id: Uuid, - /// policy: should the system actively work towards this blueprint - /// - /// This should generally be left enabled. - pub enabled: bool, - /// when this blueprint was made the target - pub time_set: chrono::DateTime, -} - -impl TryFrom for BlueprintTarget { - type Error = Error; - - fn try_from( - value: nexus_types::deployment::BlueprintTarget, - ) -> Result { - Ok(BlueprintTarget { - target_id: value.target_id.ok_or_else(|| { - Error::conflict("no target blueprint has been configured") - })?, - enabled: value.enabled, - time_set: value.time_set, - }) - } -} - /// Fetches the current target blueprint, if any #[endpoint { method = GET, @@ -721,8 +693,11 @@ async fn blueprint_target_view( let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; - let target = nexus.blueprint_target_view(&opctx).await?; - Ok(HttpResponseOk(BlueprintTarget::try_from(target)?)) + let target = + nexus.blueprint_target_view(&opctx).await?.ok_or_else(|| { + Error::conflict("no target blueprint has been configured") + })?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -741,11 +716,8 @@ async fn blueprint_target_set( let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; let target = target.into_inner(); - let result = nexus.blueprint_target_set(&opctx, target).await?; - Ok(HttpResponseOk( - BlueprintTarget::try_from(result) - .map_err(|e| Error::conflict(e.to_string()))?, - )) + let target = nexus.blueprint_target_set(&opctx, target).await?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 95404a2c17..3b4c3b3142 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -16,6 +16,7 @@ pub use crate::inventory::OmicronZoneConfig; pub use crate::inventory::OmicronZoneDataset; pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; +pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -184,13 +185,39 @@ impl Blueprint { } } -/// Describes which blueprint the system is currently trying to make real -// This is analogous to the db model type until we have that. -#[derive(Debug, Clone)] +/// Describe high-level metadata about a blueprint +// These fields are a subset of [`Blueprint`], and include only the data we can +// quickly fetch from the main blueprint table (e.g., when listing all +// blueprints). +#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Serialize)] +pub struct BlueprintMetadata { + /// unique identifier for this blueprint + pub id: Uuid, + + /// which blueprint this blueprint is based on + pub parent_blueprint_id: Option, + + /// when this blueprint was generated (for debugging) + pub time_created: chrono::DateTime, + /// identity of the component that generated the blueprint (for debugging) + /// This would generally be the Uuid of a Nexus instance. + pub creator: String, + /// human-readable string describing why this blueprint was created + /// (for debugging) + pub comment: String, +} + +/// Describes what blueprint, if any, the system is currently working toward +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, JsonSchema)] pub struct BlueprintTarget { - pub target_id: Option, + /// id of the blueprint that the system is trying to make real + pub target_id: Uuid, + /// policy: should the system actively work towards this blueprint + /// + /// This should generally be left enabled. pub enabled: bool, - pub time_set: chrono::DateTime, + /// when this blueprint was made the target + pub time_made_target: chrono::DateTime, } /// Specifies what blueprint, if any, the system should be working toward diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 8b0807d52c..bc26736b37 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -164,7 +164,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/BlueprintResultsPage" + "$ref": "#/components/schemas/BlueprintMetadataResultsPage" } } } @@ -2132,7 +2132,43 @@ "zones_in_service" ] }, - "BlueprintResultsPage": { + "BlueprintMetadata": { + "description": "Describe high-level metadata about a blueprint", + "type": "object", + "properties": { + "comment": { + "description": "human-readable string describing why this blueprint was created (for debugging)", + "type": "string" + }, + "creator": { + "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", + "type": "string" + }, + "id": { + "description": "unique identifier for this blueprint", + "type": "string", + "format": "uuid" + }, + "parent_blueprint_id": { + "nullable": true, + "description": "which blueprint this blueprint is based on", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "when this blueprint was generated (for debugging)", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "comment", + "creator", + "id", + "time_created" + ] + }, + "BlueprintMetadataResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -2140,7 +2176,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/Blueprint" + "$ref": "#/components/schemas/BlueprintMetadata" } }, "next_page": { @@ -2166,7 +2202,7 @@ "type": "string", "format": "uuid" }, - "time_set": { + "time_made_target": { "description": "when this blueprint was made the target", "type": "string", "format": "date-time" @@ -2175,7 +2211,7 @@ "required": [ "enabled", "target_id", - "time_set" + "time_made_target" ] }, "BlueprintTargetSet": { diff --git a/schema/crdb/28.0.0/up1.sql b/schema/crdb/28.0.0/up1.sql new file mode 100644 index 0000000000..fda4e3ed5c --- /dev/null +++ b/schema/crdb/28.0.0/up1.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + parent_blueprint_id UUID, + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); diff --git a/schema/crdb/28.0.0/up2.sql b/schema/crdb/28.0.0/up2.sql new file mode 100644 index 0000000000..a51c1a31fa --- /dev/null +++ b/schema/crdb/28.0.0/up2.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); diff --git a/schema/crdb/28.0.0/up3.sql b/schema/crdb/28.0.0/up3.sql new file mode 100644 index 0000000000..55e09ca719 --- /dev/null +++ b/schema/crdb/28.0.0/up3.sql @@ -0,0 +1,31 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + dataset_zpool_name TEXT, + bp_nic_id UUID, + dns_gz_address INET, + dns_gz_address_index INT8, + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up4.sql b/schema/crdb/28.0.0/up4.sql new file mode 100644 index 0000000000..beff4da802 --- /dev/null +++ b/schema/crdb/28.0.0/up4.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up5.sql b/schema/crdb/28.0.0/up5.sql new file mode 100644 index 0000000000..72c34400a3 --- /dev/null +++ b/schema/crdb/28.0.0/up5.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); diff --git a/schema/crdb/28.0.0/up6.sql b/schema/crdb/28.0.0/up6.sql new file mode 100644 index 0000000000..41e69ca3da --- /dev/null +++ b/schema/crdb/28.0.0/up6.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + version INT8 PRIMARY KEY, + blueprint_id UUID NOT NULL, + enabled BOOL NOT NULL, + time_made_target TIMESTAMPTZ NOT NULL +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c91bb669a9..86d1340379 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2954,8 +2954,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- service in them) primary_service_ip INET NOT NULL, primary_service_port INT4 - CHECK (primary_service_port BETWEEN 0 AND 65535) - NOT NULL, + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, -- The remaining properties may be NULL for different kinds of zones. The -- specific constraints are not enforced at the database layer, basically @@ -2967,7 +2967,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( second_service_ip INET, second_service_port INT4 CHECK (second_service_port IS NULL - OR second_service_port BETWEEN 0 AND 65535), + OR second_service_port BETWEEN 0 AND 65535), -- Zones may have an associated dataset. They're currently always on a U.2. -- The only thing we need to identify it here is the name of the zpool that @@ -2995,9 +2995,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- Source NAT configuration (currently used for boundary NTP only) snat_ip INET, snat_first_port INT4 - CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), snat_last_port INT4 - CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), PRIMARY KEY (inv_collection_id, id) ); @@ -3016,6 +3016,200 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( PRIMARY KEY (inv_collection_id, id) ); +/* + * System-level blueprints + * + * See RFD 457 and 459 for context. + * + * A blueprint describes a potential system configuration. The primary table is + * the `blueprint` table, which stores only a small amount of metadata about the + * blueprint. The bulk of the information is stored in the `bp_*` tables below, + * each of which references back to `blueprint` by ID. + * + * `bp_target` describes the "target blueprints" of the system. Insertion must + * follow a strict set of rules: + * + * * The first target blueprint must have version=1, and must have no parent + * blueprint. + * * The Nth target blueprint must have version=N, and its parent blueprint must + * be the blueprint that was the target at version=N-1. + * + * The result is that the current target blueprint can always be found by + * looking at the maximally-versioned row in `bp_target`, and there is a linear + * history from that blueprint all the way back to the version=1 blueprint. We + * will eventually prune old blueprint targets, so it will not always be + * possible to view the entire history. + * + * `bp_sled_omicron_zones`, `bp_omicron_zone`, and `bp_omicron_zone_nic` are + * nearly identical to their `inv_*` counterparts, and record the + * `OmicronZonesConfig` for each sled. + * + * `bp_omicron_zones_not_in_service` stores a list of Omicron zones (present in + * `bp_omicron_zone`) that are NOT in service; e.g., should not appear in + * internal DNS. Nexus's in-memory `Blueprint` representation stores the set of + * zones that ARE in service. We invert that logic at this layer because we + * expect most blueprints to have a relatively large number of omicron zones, + * almost all of which will be in service. This is a minor and perhaps + * unnecessary optimization at the database layer, but it's also relatively + * simple and hidden by the relevant read and insert queries in + * `nexus-db-queries`. + */ + +-- list of all blueprints +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + + -- This is effectively a foreign key back to this table; however, it is + -- allowed to be NULL: the initial blueprint has no parent. Additionally, + -- it may be non-NULL but no longer reference a row in this table: once a + -- child blueprint has been created from a parent, it's possible for the + -- parent to be deleted. We do not NULL out this field on such a deletion, + -- so we can always see that there had been a particular parent even if it's + -- now gone. + parent_blueprint_id UUID, + + -- These fields are for debugging only. + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); + +-- table describing both the current and historical target blueprints of the +-- system +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + -- Monotonically increasing version for all bp_targets + version INT8 PRIMARY KEY, + + -- Effectively a foreign key into the `blueprint` table, but may reference a + -- blueprint that has been deleted (if this target is no longer the current + -- target: the current target must not be deleted). + blueprint_id UUID NOT NULL, + + -- Is this blueprint enabled? + -- + -- Currently, we have no code that acts on this value; however, it exists as + -- an escape hatch once we have automated blueprint planning and execution. + -- An operator can set the current blueprint to disabled, which should stop + -- planning and execution (presumably until a support case can address + -- whatever issue the update system is causing). + enabled BOOL NOT NULL, + + -- Timestamp for when this blueprint was made the current target + time_made_target TIMESTAMPTZ NOT NULL +); + +-- see inv_sled_omicron_zones, which is identical except it references a +-- collection whereas this table references a blueprint +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + -- foreign key into `blueprint` table + blueprint_id UUID NOT NULL, + + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); + +-- description of omicron zones specified in a blueprint +-- +-- This is currently identical to `inv_omicron_zone`, except that the foreign +-- keys reference other blueprint tables intead of inventory tables. We expect +-- their sameness to diverge over time as either inventory or blueprints (or +-- both) grow context-specific properties. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + -- foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a blueprint could refer to a sled that no longer exists, + -- particularly if the blueprint is older than the current target) + sled_id UUID NOT NULL, + + -- unique id for this zone + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + + -- SocketAddr of the "primary" service for this zone + -- (what this describes varies by zone type, but all zones have at least one + -- service in them) + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + + -- The remaining properties may be NULL for different kinds of zones. The + -- specific constraints are not enforced at the database layer, basically + -- because it's really complicated to do that and it's not obvious that it's + -- worthwhile. + + -- Some zones have a second service. Like the primary one, the meaning of + -- this is zone-type-dependent. + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + + -- Zones may have an associated dataset. They're currently always on a U.2. + -- The only thing we need to identify it here is the name of the zpool that + -- it's on. + dataset_zpool_name TEXT, + + -- Zones with external IPs have an associated NIC and sockaddr for listening + -- (first is a foreign key into `bp_omicron_zone_nic`) + bp_nic_id UUID, + + -- Properties for internal DNS servers + -- address attached to this zone from outside the sled's subnet + dns_gz_address INET, + dns_gz_address_index INT8, + + -- Properties common to both kinds of NTP zones + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + + -- Properties specific to Nexus zones + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + + -- Source NAT configuration (currently used for boundary NTP only) + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); + +-- list of omicron zones that are considered NOT in-service for a blueprint +-- +-- In Rust code, we generally want to deal with "zones in service", which means +-- they should appear in DNS. However, almost all zones in almost all blueprints +-- will be in service, so we can induce considerably less database work by +-- storing the zones _not_ in service. Our DB wrapper layer handles this +-- inversion, so the rest of our Rust code can ignore it. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); + /*******************************************************************/ /* @@ -3196,7 +3390,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '27.0.0', NULL) + ( TRUE, NOW(), NOW(), '28.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;