diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d79c836fba..1805da8ad8 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@8f354f35e51028c902e8ab954045e37739acf562 # v2 + uses: taiki-e/install-action@f7c663c03b51ed0d93e9cec22a575d3f02175989 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/Cargo.lock b/Cargo.lock index 07f804b03d..6580e1de55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -324,7 +324,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4d45f362125ed144544e57b0ec6de8fd6a296d41a6252fc4a20c0cf12e9ed3a" dependencies = [ - "rustix 0.38.9", + "rustix 0.38.25", "tempfile", "windows-sys 0.48.0", ] @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "camino-tempfile" -version = "1.0.2" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ab15a83d13f75dbd86f082bdefd160b628476ef58d3b900a0ef74e001bb097" +checksum = "cb905055fa81e4d427f919b2cd0d76a998267de7d225ea767a1894743a5263c2" dependencies = [ "camino", "tempfile", @@ -1671,9 +1671,9 @@ dependencies = [ [[package]] name = "diesel_derives" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e054665eaf6d97d1e7125512bb2d35d07c73ac86cc6920174cb42d1ab697a554" +checksum = "ef8337737574f55a468005a83499da720f20c65586241ffea339db9ecdfd2b44" dependencies = [ "diesel_table_macro_syntax", "proc-macro2", @@ -1791,7 +1791,7 @@ dependencies = [ "omicron-workspace-hack", "openapi-lint", "openapiv3 1.0.3", - "pretty-hex 0.3.0", + "pretty-hex 0.4.0", "schemars", "serde", "serde_json", @@ -2151,7 +2151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ "cfg-if 1.0.0", - "rustix 0.38.9", + "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -3072,6 +3072,7 @@ dependencies = [ "bhyve_api", "byteorder", "camino", + "camino-tempfile", "cfg-if 1.0.0", "crucible-smf", "futures", @@ -3383,7 +3384,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix 0.38.9", + "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -3636,9 +3637,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "lock_api" @@ -3992,6 +3993,7 @@ dependencies = [ "sled-agent-client", "steno", "strum", + "thiserror", "uuid", ] @@ -4177,6 +4179,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "serde_with", "steno", "strum", "uuid", @@ -4935,6 +4938,7 @@ dependencies = [ "diesel", "digest", "either", + "errno", "flate2", "futures", "futures-channel", @@ -4979,7 +4983,7 @@ dependencies = [ "regex-syntax 0.8.2", "reqwest", "ring 0.16.20", - "rustix 0.38.9", + "rustix 0.38.25", "schemars", "semver 1.0.20", "serde", @@ -5974,9 +5978,9 @@ checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" [[package]] name = "pretty-hex" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" +checksum = "23c6b968ed37d62e35b4febaba13bfa231b0b7929d68b8a94e65445a17e2d35f" [[package]] name = "pretty_assertions" @@ -6421,6 +6425,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -6872,14 +6885,14 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.9" +version = "0.38.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bfe0f2582b4931a45d1fa608f8a8722e8b3c7ac54dd6d5f3b3212791fedef49" +checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" dependencies = [ "bitflags 2.4.0", "errno", "libc", - "linux-raw-sys 0.4.5", + "linux-raw-sys 0.4.11", "windows-sys 0.48.0", ] @@ -8170,14 +8183,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if 1.0.0", "fastrand", - "redox_syscall 0.3.5", - "rustix 0.38.9", + "redox_syscall 0.4.1", + "rustix 0.38.25", "windows-sys 0.48.0", ] @@ -9882,9 +9895,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" dependencies = [ "zeroize_derive", ] diff --git a/Cargo.toml b/Cargo.toml index e4588efbde..694cd2c8dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,7 +160,7 @@ byteorder = "1.5.0" bytes = "1.5.0" bytesize = "1.3.0" camino = "1.1" -camino-tempfile = "1.0.2" +camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" ciborium = "0.2.1" @@ -282,13 +282,13 @@ p256 = "0.13" parse-display = "0.8.2" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } paste = "1.0.14" -percent-encoding = "2.3.0" +percent-encoding = "2.3.1" pem = "1.1" petgraph = "0.6.4" postgres-protocol = "0.6.6" predicates = "3.0.4" pretty_assertions = "1.4.0" -pretty-hex = "0.3.0" +pretty-hex = "0.4.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } @@ -389,7 +389,7 @@ walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } wicketd-client = { path = "clients/wicketd-client" } -zeroize = { version = "1.6.0", features = ["zeroize_derive", "std"] } +zeroize = { version = "1.7.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } zone = { version = "0.3", default-features = false, features = ["async"] } diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 497454e047..8296eace5c 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -11,6 +11,7 @@ async-trait.workspace = true bhyve_api.workspace = true byteorder.workspace = true camino.workspace = true +camino-tempfile.workspace = true cfg-if.workspace = true crucible-smf.workspace = true futures.workspace = true diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ba8cd009e8..ea80a6d34b 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -11,10 +11,12 @@ use crate::opte::{Port, PortTicket}; use crate::svc::wait_for_service; use crate::zone::{AddressRequest, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; +use camino_tempfile::Utf8TempDir; use ipnetwork::IpNetwork; use omicron_common::backoff; use slog::{error, info, o, warn, Logger}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::sync::Arc; #[cfg(target_os = "illumos")] use std::sync::OnceLock; #[cfg(target_os = "illumos")] @@ -1043,7 +1045,7 @@ pub struct ServiceProcess { pub log_file: Utf8PathBuf, } -/// Errors returned from [`InstalledZone::install`]. +/// Errors returned from [`ZoneBuilder::install`]. #[derive(thiserror::Error, Debug)] pub enum InstallZoneError { #[error("Cannot create '{zone}': failed to create control VNIC: {err}")] @@ -1063,6 +1065,9 @@ pub enum InstallZoneError { #[error("Failed to find zone image '{image}' from {paths:?}")] ImageNotFound { image: String, paths: Vec }, + + #[error("Attempted to call install() on underspecified ZoneBuilder")] + IncompleteBuilder, } pub struct InstalledZone { @@ -1119,24 +1124,208 @@ impl InstalledZone { &self.zonepath } - // TODO: This would benefit from a "builder-pattern" interface. - #[allow(clippy::too_many_arguments)] - pub async fn install( - log: &Logger, - underlay_vnic_allocator: &VnicAllocator, - zone_root_path: &Utf8Path, - zone_image_paths: &[Utf8PathBuf], - zone_type: &str, - unique_name: Option, - datasets: &[zone::Dataset], - filesystems: &[zone::Fs], - data_links: &[String], - devices: &[zone::Device], - opte_ports: Vec<(Port, PortTicket)>, - bootstrap_vnic: Option, - links: Vec, - limit_priv: Vec, - ) -> Result { + pub fn site_profile_xml_path(&self) -> Utf8PathBuf { + let mut path: Utf8PathBuf = self.zonepath().into(); + path.push("root/var/svc/profile/site.xml"); + path + } +} + +#[derive(Clone)] +pub struct FakeZoneBuilderConfig { + temp_dir: Arc, +} + +#[derive(Clone, Default)] +pub struct ZoneBuilderFactory { + // Why this is part of this builder/factory and not some separate builder + // type: At time of writing, to the best of my knowledge: + // - If we want builder pattern, we need to return some type of `Self`. + // - If we have a trait that returns `Self` type, we can't turn it into a + // trait object (i.e. Box). + // - Plumbing concrete types as generics through every other type that + // needs to construct zones (and anything else with a lot of parameters) + // seems like a worse idea. + fake_cfg: Option, +} + +impl ZoneBuilderFactory { + /// For use in unit tests that don't require actual zone creation to occur. + pub fn fake() -> Self { + Self { + fake_cfg: Some(FakeZoneBuilderConfig { + temp_dir: Arc::new(Utf8TempDir::new().unwrap()), + }), + } + } + + /// Create a [ZoneBuilder] that inherits this factory's fakeness. + pub fn builder<'a>(&self) -> ZoneBuilder<'a> { + ZoneBuilder { fake_cfg: self.fake_cfg.clone(), ..Default::default() } + } +} + +/// Builder-pattern construct for creating an [InstalledZone]. +/// Created by [ZoneBuilderFactory]. +#[derive(Default)] +pub struct ZoneBuilder<'a> { + log: Option, + underlay_vnic_allocator: Option<&'a VnicAllocator>, + zone_root_path: Option<&'a Utf8Path>, + zone_image_paths: Option<&'a [Utf8PathBuf]>, + zone_type: Option<&'a str>, + unique_name: Option, // actually optional + datasets: Option<&'a [zone::Dataset]>, + filesystems: Option<&'a [zone::Fs]>, + data_links: Option<&'a [String]>, + devices: Option<&'a [zone::Device]>, + opte_ports: Option>, + bootstrap_vnic: Option, // actually optional + links: Option>, + limit_priv: Option>, + fake_cfg: Option, +} + +impl<'a> ZoneBuilder<'a> { + pub fn with_log(mut self, log: Logger) -> Self { + self.log = Some(log); + self + } + + pub fn with_underlay_vnic_allocator( + mut self, + vnic_allocator: &'a VnicAllocator, + ) -> Self { + self.underlay_vnic_allocator = Some(vnic_allocator); + self + } + + pub fn with_zone_root_path(mut self, root_path: &'a Utf8Path) -> Self { + self.zone_root_path = Some(root_path); + self + } + + pub fn with_zone_image_paths( + mut self, + image_paths: &'a [Utf8PathBuf], + ) -> Self { + self.zone_image_paths = Some(image_paths); + self + } + + pub fn with_zone_type(mut self, zone_type: &'a str) -> Self { + self.zone_type = Some(zone_type); + self + } + + pub fn with_unique_name(mut self, uuid: Uuid) -> Self { + self.unique_name = Some(uuid); + self + } + + pub fn with_datasets(mut self, datasets: &'a [zone::Dataset]) -> Self { + self.datasets = Some(datasets); + self + } + + pub fn with_filesystems(mut self, filesystems: &'a [zone::Fs]) -> Self { + self.filesystems = Some(filesystems); + self + } + + pub fn with_data_links(mut self, links: &'a [String]) -> Self { + self.data_links = Some(links); + self + } + + pub fn with_devices(mut self, devices: &'a [zone::Device]) -> Self { + self.devices = Some(devices); + self + } + + pub fn with_opte_ports(mut self, ports: Vec<(Port, PortTicket)>) -> Self { + self.opte_ports = Some(ports); + self + } + + pub fn with_bootstrap_vnic(mut self, vnic: Link) -> Self { + self.bootstrap_vnic = Some(vnic); + self + } + + pub fn with_links(mut self, links: Vec) -> Self { + self.links = Some(links); + self + } + + pub fn with_limit_priv(mut self, limit_priv: Vec) -> Self { + self.limit_priv = Some(limit_priv); + self + } + + fn fake_install(self) -> Result { + let zone = self + .zone_type + .ok_or(InstallZoneError::IncompleteBuilder)? + .to_string(); + let control_vnic = self + .underlay_vnic_allocator + .ok_or(InstallZoneError::IncompleteBuilder)? + .new_control(None) + .map_err(move |err| InstallZoneError::CreateVnic { zone, err })?; + let fake_cfg = self.fake_cfg.unwrap(); + let temp_dir = fake_cfg.temp_dir.path().to_path_buf(); + (|| { + let full_zone_name = InstalledZone::get_zone_name( + self.zone_type?, + self.unique_name, + ); + let zonepath = temp_dir + .join(self.zone_root_path?.strip_prefix("/").unwrap()) + .join(&full_zone_name); + let iz = InstalledZone { + log: self.log?, + zonepath, + name: full_zone_name, + control_vnic, + bootstrap_vnic: self.bootstrap_vnic, + opte_ports: self.opte_ports?, + links: self.links?, + }; + let xml_path = iz.site_profile_xml_path().parent()?.to_path_buf(); + std::fs::create_dir_all(&xml_path) + .unwrap_or_else(|_| panic!("ZoneBuilder::fake_install couldn't create site profile xml path {:?}", xml_path)); + Some(iz) + })() + .ok_or(InstallZoneError::IncompleteBuilder) + } + + pub async fn install(self) -> Result { + if self.fake_cfg.is_some() { + return self.fake_install(); + } + + let Self { + log: Some(log), + underlay_vnic_allocator: Some(underlay_vnic_allocator), + zone_root_path: Some(zone_root_path), + zone_image_paths: Some(zone_image_paths), + zone_type: Some(zone_type), + unique_name, + datasets: Some(datasets), + filesystems: Some(filesystems), + data_links: Some(data_links), + devices: Some(devices), + opte_ports: Some(opte_ports), + bootstrap_vnic, + links: Some(links), + limit_priv: Some(limit_priv), + .. + } = self + else { + return Err(InstallZoneError::IncompleteBuilder); + }; + let control_vnic = underlay_vnic_allocator.new_control(None).map_err(|err| { InstallZoneError::CreateVnic { @@ -1145,7 +1334,8 @@ impl InstalledZone { } })?; - let full_zone_name = Self::get_zone_name(zone_type, unique_name); + let full_zone_name = + InstalledZone::get_zone_name(zone_type, unique_name); // Looks for the image within `zone_image_path`, in order. let image = format!("{}.tar.gz", zone_type); @@ -1183,7 +1373,7 @@ impl InstalledZone { net_device_names.dedup(); Zones::install_omicron_zone( - log, + &log, &zone_root_path, &full_zone_name, &zone_image_path, @@ -1210,12 +1400,6 @@ impl InstalledZone { links, }) } - - pub fn site_profile_xml_path(&self) -> Utf8PathBuf { - let mut path: Utf8PathBuf = self.zonepath().into(); - path.push("root/var/svc/profile/site.xml"); - path - } } /// Return true if the service with the given FMRI appears to be an diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index b7514c4806..477ce7d11f 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -26,6 +26,7 @@ serde.workspace = true serde_json.workspace = true steno.workspace = true strum.workspace = true +thiserror.workspace = true uuid.workspace = true db-macros.workspace = true diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index ac5bad26f8..43bf83fd34 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -70,6 +70,7 @@ mod silo_user; mod silo_user_password_hash; mod sled; mod sled_instance; +mod sled_provision_state; mod sled_resource; mod sled_resource_kind; mod sled_underlay_subnet_allocation; @@ -152,6 +153,7 @@ pub use silo_user::*; pub use silo_user_password_hash::*; pub use sled::*; pub use sled_instance::*; +pub use sled_provision_state::*; pub use sled_resource::*; pub use sled_resource_kind::*; pub use sled_underlay_subnet_allocation::*; @@ -287,10 +289,9 @@ macro_rules! impl_enum_type { Ok($model_type::$enum_item) } )* - _ => { - Err(concat!("Unrecognized enum variant for ", - stringify!{$model_type}) - .into()) + other => { + let s = concat!("Unrecognized enum variant for ", stringify!{$model_type}); + Err(format!("{}: (raw bytes: {:?})", s, other).into()) } } } diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs index 2025e79fb8..a1b9e0373a 100644 --- a/nexus/db-model/src/queries/region_allocation.rs +++ b/nexus/db-model/src/queries/region_allocation.rs @@ -23,6 +23,7 @@ // a CTE (where we want the alias name to come first). use crate::schema::dataset; +use crate::schema::sled; use crate::schema::zpool; table! { @@ -157,6 +158,7 @@ diesel::allow_tables_to_appear_in_same_query!( diesel::allow_tables_to_appear_in_same_query!( old_zpool_usage, zpool, + sled, proposed_dataset_changes, ); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 821cc463b5..3017db6890 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -742,6 +742,7 @@ table! { ip -> Inet, port -> Int4, last_used_address -> Inet, + provision_state -> crate::SledProvisionStateEnum, } } diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 4c82aa5d23..0f6d1b911e 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -4,8 +4,8 @@ use super::{ByteCount, Generation, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; -use crate::ipv6; use crate::schema::{physical_disk, service, sled, zpool}; +use crate::{ipv6, SledProvisionState}; use chrono::{DateTime, Utc}; use db_macros::Asset; use nexus_types::{external_api::shared, external_api::views, identity::Asset}; @@ -59,6 +59,8 @@ pub struct Sled { /// The last IP address provided to an Oxide service on this sled pub last_used_address: ipv6::Ipv6Addr, + + provision_state: SledProvisionState, } impl Sled { @@ -81,6 +83,10 @@ impl Sled { pub fn serial_number(&self) -> &str { &self.serial_number } + + pub fn provision_state(&self) -> SledProvisionState { + self.provision_state + } } impl From for views::Sled { @@ -93,6 +99,7 @@ impl From for views::Sled { part: sled.part_number, revision: sled.revision, }, + provision_state: sled.provision_state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, } @@ -188,6 +195,8 @@ impl SledUpdate { serial_number: self.serial_number, part_number: self.part_number, revision: self.revision, + // By default, sleds start as provisionable. + provision_state: SledProvisionState::Provisionable, usable_hardware_threads: self.usable_hardware_threads, usable_physical_ram: self.usable_physical_ram, reservoir_size: self.reservoir_size, diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs new file mode 100644 index 0000000000..6cf81b9c70 --- /dev/null +++ b/nexus/db-model/src/sled_provision_state.rs @@ -0,0 +1,58 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use nexus_types::external_api::views; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "sled_provision_state"))] + pub struct SledProvisionStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = SledProvisionStateEnum)] + pub enum SledProvisionState; + + // Enum values + Provisionable => b"provisionable" + NonProvisionable => b"non_provisionable" +); + +impl From for views::SledProvisionState { + fn from(state: SledProvisionState) -> Self { + match state { + SledProvisionState::Provisionable => { + views::SledProvisionState::Provisionable + } + SledProvisionState::NonProvisionable => { + views::SledProvisionState::NonProvisionable + } + } + } +} + +impl TryFrom for SledProvisionState { + type Error = UnknownSledProvisionState; + + fn try_from(state: views::SledProvisionState) -> Result { + match state { + views::SledProvisionState::Provisionable => { + Ok(SledProvisionState::Provisionable) + } + views::SledProvisionState::NonProvisionable => { + Ok(SledProvisionState::NonProvisionable) + } + views::SledProvisionState::Unknown => { + Err(UnknownSledProvisionState) + } + } + } +} + +/// An unknown [`views::SledProvisionState`] was encountered. +#[derive(Clone, Debug, Error)] +#[error("Unknown SledProvisionState")] +pub struct UnknownSledProvisionState; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 0612b960c9..44cd7a95b7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -372,8 +372,8 @@ mod test { BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession, Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, - SledSystemHardware, SledUpdate, SshKey, SystemUpdate, - UpdateableComponentType, VpcSubnet, Zpool, + SledProvisionState, SledSystemHardware, SledUpdate, SshKey, + SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use assert_matches::assert_matches; @@ -610,6 +610,35 @@ mod test { sled_id } + // Marks a sled as non-provisionable. + async fn mark_sled_non_provisionable( + datastore: &DataStore, + opctx: &OpContext, + sled_id: Uuid, + ) { + let (authz_sled, sled) = LookupPath::new(opctx, datastore) + .sled_id(sled_id) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + println!("sled: {:?}", sled); + let old_state = datastore + .sled_set_provision_state( + &opctx, + &authz_sled, + SledProvisionState::NonProvisionable, + ) + .await + .unwrap_or_else(|error| { + panic!( + "error marking sled {sled_id} as non-provisionable: {error}" + ) + }); + // The old state should always be provisionable since that's where we + // start. + assert_eq!(old_state, SledProvisionState::Provisionable); + } + fn test_zpool_size() -> ByteCount { ByteCount::from_gibibytes_u32(100) } @@ -770,13 +799,24 @@ mod test { let logctx = dev::test_setup_log("test_region_allocation_strat_random"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation( + let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), + // Even though we're going to mark one sled as non-provisionable to + // test that logic, we aren't forcing the datasets to be on + // distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough. REGION_REDUNDANCY_THRESHOLD, ) .await; + let non_provisionable_dataset_id = test_datasets[0].dataset_id; + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, + ) + .await; + // Allocate regions from the datasets for this disk. Do it a few times // for good measure. for alloc_seed in 0..10 { @@ -809,6 +849,9 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // Dataset must not be non-provisionable. + assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); @@ -837,12 +880,23 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - // Create a rack without enough sleds for a successful allocation when - // we require 3 distinct sleds. + // Create a rack with enough sleds for a successful allocation when we + // require 3 distinct provisionable sleds. let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), - REGION_REDUNDANCY_THRESHOLD, + // We're going to mark one sled as non-provisionable to test that + // logic, and we *are* forcing the datasets to be on distinct + // sleds: hence threshold + 1. + REGION_REDUNDANCY_THRESHOLD + 1, + ) + .await; + + let non_provisionable_dataset_id = test_datasets[0].dataset_id; + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, ) .await; @@ -884,6 +938,9 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // Dataset must not be non-provisionable. + assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); @@ -916,11 +973,22 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a rack without enough sleds for a successful allocation when - // we require 3 distinct sleds. - create_test_datasets_for_region_allocation( + // we require 3 distinct provisionable sleds. + let test_datasets = create_test_datasets_for_region_allocation( &opctx, datastore.clone(), - REGION_REDUNDANCY_THRESHOLD - 1, + // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1 + // provisionable sleds to test this failure condition. We're going + // to mark one sled as non-provisionable to test that logic, so we + // need to add 1 to that number. + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + mark_sled_non_provisionable( + &datastore, + &opctx, + test_datasets[0].sled_id, ) .await; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 130c36b496..406119a636 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -15,6 +15,7 @@ use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledUpdate; use crate::db::pagination::paginated; +use crate::db::update_and_check::UpdateAndCheck; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -153,6 +154,11 @@ impl DataStore { .and(sled_has_space_in_reservoir), ) .filter(sled_dsl::time_deleted.is_null()) + // Filter out sleds that are not provisionable. + .filter( + sled_dsl::provision_state + .eq(db::model::SledProvisionState::Provisionable), + ) .select(sled_dsl::id) .into_boxed(); @@ -217,6 +223,37 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; Ok(()) } + + /// Sets the provision state for this sled. + /// + /// Returns the previous state. + pub async fn sled_set_provision_state( + &self, + opctx: &OpContext, + authz_sled: &authz::Sled, + state: db::model::SledProvisionState, + ) -> Result { + use db::schema::sled::dsl; + + opctx.authorize(authz::Action::Modify, authz_sled).await?; + + let sled_id = authz_sled.id(); + let query = diesel::update(dsl::sled) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(sled_id)) + .filter(dsl::provision_state.ne(state)) + .set(( + dsl::provision_state.eq(state), + dsl::time_modified.eq(Utc::now()), + )) + .check_if_exists::(sled_id); + let result = query + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(result.found.provision_state()) + } } #[cfg(test)] @@ -226,12 +263,15 @@ mod test { use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; + use crate::db::lookup::LookupPath; use crate::db::model::ByteCount; use crate::db::model::SqlU32; use nexus_test_utils::db::test_setup_database; + use nexus_types::identity::Asset; use omicron_common::api::external; use omicron_test_utils::dev; use std::net::{Ipv6Addr, SocketAddrV6}; + use std::num::NonZeroU32; fn rack_id() -> Uuid { Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap() @@ -243,19 +283,9 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (_opctx, datastore) = datastore_test(&logctx, &db).await; - let sled_id = Uuid::new_v4(); - let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); - let mut sled_update = SledUpdate::new( - sled_id, - addr, - sled_baseboard_for_test(), - sled_system_hardware_for_test(), - rack_id(), - ); - let observed_sled = datastore - .sled_upsert(sled_update.clone()) - .await - .expect("Could not upsert sled during test prep"); + let mut sled_update = test_new_sled_update(); + let observed_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!( observed_sled.usable_hardware_threads, sled_update.usable_hardware_threads @@ -301,4 +331,119 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + /// Test that new reservations aren't created on non-provisionable sleds. + #[tokio::test] + async fn sled_reservation_create_non_provisionable() { + let logctx = + dev::test_setup_log("sled_reservation_create_non_provisionable"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let sled_update = test_new_sled_update(); + let non_provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + + let (authz_sled, _) = LookupPath::new(&opctx, &datastore) + .sled_id(non_provisionable_sled.id()) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + + let old_state = datastore + .sled_set_provision_state( + &opctx, + &authz_sled, + db::model::SledProvisionState::NonProvisionable, + ) + .await + .unwrap(); + assert_eq!( + old_state, + db::model::SledProvisionState::Provisionable, + "a newly created sled starts as provisionable" + ); + + // This should be an error since there are no provisionable sleds. + let resources = db::model::Resources::new( + 1, + // Just require the bare non-zero amount of RAM. + ByteCount::try_from(1024).unwrap(), + ByteCount::try_from(1024).unwrap(), + ); + let constraints = db::model::SledReservationConstraints::none(); + let error = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap_err(); + assert!(matches!(error, external::Error::ServiceUnavailable { .. })); + + // Now add a provisionable sled and try again. + let sled_update = test_new_sled_update(); + let provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + + let sleds = datastore + .sled_list(&opctx, &first_page(NonZeroU32::new(10).unwrap())) + .await + .unwrap(); + println!("sleds: {:?}", sleds); + + // Try a few times to ensure that resources never get allocated to the + // non-provisionable sled. + for _ in 0..10 { + let constraints = db::model::SledReservationConstraints::none(); + let resource = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap(); + assert_eq!( + resource.sled_id, + provisionable_sled.id(), + "resource is always allocated to the provisionable sled" + ); + + datastore + .sled_reservation_delete(&opctx, resource.id) + .await + .unwrap(); + } + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + fn test_new_sled_update() -> SledUpdate { + let sled_id = Uuid::new_v4(); + let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); + SledUpdate::new( + sled_id, + addr, + sled_baseboard_for_test(), + sled_system_hardware_for_test(), + rack_id(), + ) + } + + /// Returns pagination parameters to fetch the first page of results for a + /// paginated endpoint + fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { + DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + limit, + } + } } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a080af4c37..031be92c08 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -290,6 +290,7 @@ impl CandidateZpools { seed: u128, distinct_sleds: bool, ) -> Self { + use schema::sled::dsl as sled_dsl; use schema::zpool::dsl as zpool_dsl; // Why are we using raw `diesel::dsl::sql` here? @@ -310,13 +311,20 @@ impl CandidateZpools { + diesel::dsl::sql(&zpool_size_delta.to_string())) .le(diesel::dsl::sql(zpool_dsl::total_size::NAME)); + // We need to join on the sled table to access provision_state. + let with_sled = sled_dsl::sled.on(zpool_dsl::sled_id.eq(sled_dsl::id)); let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)); + .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)) + .inner_join(with_sled); + + let sled_is_provisionable = sled_dsl::provision_state + .eq(crate::db::model::SledProvisionState::Provisionable); let base_query = old_zpool_usage .query_source() .inner_join(with_zpool) .filter(it_will_fit) + .filter(sled_is_provisionable) .select((old_zpool_usage::dsl::pool_id,)); let query = if distinct_sleds { diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index c2931f1441..44efc2934e 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -8,6 +8,7 @@ use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; +use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; @@ -142,6 +143,20 @@ impl super::Nexus { .await } + /// Returns the old state. + pub(crate) async fn sled_set_provision_state( + &self, + opctx: &OpContext, + sled_lookup: &lookup::Sled<'_>, + state: db::model::SledProvisionState, + ) -> Result { + let (authz_sled,) = + sled_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore + .sled_set_provision_state(opctx, &authz_sled, state) + .await + } + // Physical disks pub(crate) async fn sled_list_physical_disks( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 78f675c28a..f1302f4a73 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -218,6 +218,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(rack_view)?; api.register(sled_list)?; api.register(sled_view)?; + api.register(sled_set_provision_state)?; api.register(sled_instance_list)?; api.register(sled_physical_disk_list)?; api.register(physical_disk_list)?; @@ -4483,6 +4484,47 @@ async fn sled_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Set the sled's provision state. +#[endpoint { + method = PUT, + path = "/v1/system/hardware/sleds/{sled_id}/provision-state", + tags = ["system/hardware"], +}] +async fn sled_set_provision_state( + rqctx: RequestContext>, + path_params: Path, + new_provision_state: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let path = path_params.into_inner(); + let provision_state = new_provision_state.into_inner().state; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + // Convert the external `SledProvisionState` into our internal data model. + let new_state = + db::model::SledProvisionState::try_from(provision_state).map_err( + |error| HttpError::for_bad_request(None, format!("{error}")), + )?; + + let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; + + let old_state = nexus + .sled_set_provision_state(&opctx, &sled_lookup, new_state) + .await?; + + let response = params::SledProvisionStateResponse { + old_state: old_state.into(), + new_state: new_state.into(), + }; + + Ok(HttpResponseOk(response)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List instances running on a given sled #[endpoint { method = GET, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 5dfdcc151d..536b96f7ae 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -50,6 +50,12 @@ lazy_static! { format!("/v1/system/hardware/uninitialized-sleds"); pub static ref HARDWARE_SLED_URL: String = format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID); + pub static ref HARDWARE_SLED_PROVISION_STATE_URL: String = + format!("/v1/system/hardware/sleds/{}/provision-state", SLED_AGENT_UUID); + pub static ref DEMO_SLED_PROVISION_STATE: params::SledProvisionStateParams = + params::SledProvisionStateParams { + state: nexus_types::external_api::views::SledProvisionState::NonProvisionable, + }; pub static ref HARDWARE_SWITCH_URL: String = format!("/v1/system/hardware/switches/{}", SWITCH_UUID); pub static ref HARDWARE_DISK_URL: String = @@ -1609,6 +1615,15 @@ lazy_static! { allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &HARDWARE_SLED_PROVISION_STATE_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![AllowedMethod::Put( + serde_json::to_value(&*DEMO_SLED_PROVISION_STATE).unwrap() + )], + }, + VerifyEndpoint { url: "/v1/system/hardware/switches", visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 213e7f9e4f..6feafe415d 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -629,7 +629,17 @@ impl InformationSchema { self.referential_constraints, other.referential_constraints ); - similar_asserts::assert_eq!(self.statistics, other.statistics); + similar_asserts::assert_eq!( + self.statistics, + other.statistics, + "Statistics did not match. This often means that in dbinit.sql, a new \ + column was added into the middle of a table rather than to the end. \ + If that is the case:\n\n \ + \ + * Change dbinit.sql to add the column to the end of the table.\n\ + * Update nexus/db-model/src/schema.rs and the corresponding \ + Queryable/Insertable struct with the new column ordering." + ); similar_asserts::assert_eq!(self.sequences, other.sequences); similar_asserts::assert_eq!(self.pg_indexes, other.pg_indexes); } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index dd387ab979..7e57d00df2 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -120,6 +120,7 @@ rack_view GET /v1/system/hardware/racks/{rac sled_instance_list GET /v1/system/hardware/sleds/{sled_id}/instances sled_list GET /v1/system/hardware/sleds sled_physical_disk_list GET /v1/system/hardware/sleds/{sled_id}/disks +sled_set_provision_state PUT /v1/system/hardware/sleds/{sled_id}/provision-state sled_view GET /v1/system/hardware/sleds/{sled_id} switch_list GET /v1/system/hardware/switches switch_view GET /v1/system/hardware/switches/{switch_id} diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 9cb94a8484..8cbbd8626c 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -14,6 +14,7 @@ parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true serde_json.workspace = true +serde_with.workspace = true steno.workspace = true strum.workspace = true uuid.workspace = true diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 9b11b0f524..3303d38367 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -75,6 +75,23 @@ pub struct SledSelector { pub sled: Uuid, } +/// Parameters for `sled_set_provision_state`. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct SledProvisionStateParams { + /// The provision state. + pub state: super::views::SledProvisionState, +} + +/// Response to `sled_set_provision_state`. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct SledProvisionStateResponse { + /// The old provision state. + pub old_state: super::views::SledProvisionState, + + /// The new provision state. + pub new_state: super::views::SledProvisionState, +} + pub struct SwitchSelector { /// ID of the switch pub switch: Uuid, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 9dfe36d63b..6d02623f34 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use serde_with::rust::deserialize_ignore_any; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::net::IpAddr; @@ -286,12 +287,38 @@ pub struct Sled { pub baseboard: Baseboard, /// The rack to which this Sled is currently attached pub rack_id: Uuid, + /// The provision state of the sled. + pub provision_state: SledProvisionState, /// The number of hardware threads which can execute on this sled pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, } +/// The provision state of a sled. +/// +/// This controls whether new resources are going to be provisioned on this +/// sled. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +#[serde(rename_all = "snake_case")] +pub enum SledProvisionState { + /// New resources will be provisioned on this sled. + Provisionable, + + /// New resources will not be provisioned on this sled. However, existing + /// resources will continue to be on this sled unless manually migrated + /// off. + NonProvisionable, + + /// This is a state that isn't known yet. + /// + /// This is defined to avoid API breakage. + #[serde(other, deserialize_with = "deserialize_ignore_any")] + Unknown, +} + /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/openapi/nexus.json b/openapi/nexus.json index fa0e22f579..15e75f93ff 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3817,6 +3817,55 @@ } } }, + "/v1/system/hardware/sleds/{sled_id}/provision-state": { + "put": { + "tags": [ + "system/hardware" + ], + "summary": "Set the sled's provision state.", + "operationId": "sled_set_provision_state", + "parameters": [ + { + "in": "path", + "name": "sled_id", + "description": "ID of the sled", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledProvisionStateParams" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledProvisionStateResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/hardware/switch-port": { "get": { "tags": [ @@ -12995,6 +13044,14 @@ "type": "string", "format": "uuid" }, + "provision_state": { + "description": "The provision state of the sled.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + }, "rack_id": { "description": "The rack to which this Sled is currently attached", "type": "string", @@ -13028,6 +13085,7 @@ "required": [ "baseboard", "id", + "provision_state", "rack_id", "time_created", "time_modified", @@ -13118,6 +13176,75 @@ "items" ] }, + "SledProvisionState": { + "description": "The provision state of a sled.\n\nThis controls whether new resources are going to be provisioned on this sled.", + "oneOf": [ + { + "description": "New resources will be provisioned on this sled.", + "type": "string", + "enum": [ + "provisionable" + ] + }, + { + "description": "New resources will not be provisioned on this sled. However, existing resources will continue to be on this sled unless manually migrated off.", + "type": "string", + "enum": [ + "non_provisionable" + ] + }, + { + "description": "This is a state that isn't known yet.\n\nThis is defined to avoid API breakage.", + "type": "string", + "enum": [ + "unknown" + ] + } + ] + }, + "SledProvisionStateParams": { + "description": "Parameters for `sled_set_provision_state`.", + "type": "object", + "properties": { + "state": { + "description": "The provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + } + }, + "required": [ + "state" + ] + }, + "SledProvisionStateResponse": { + "description": "Response to `sled_set_provision_state`.", + "type": "object", + "properties": { + "new_state": { + "description": "The new provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + }, + "old_state": { + "description": "The old provision state.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionState" + } + ] + } + }, + "required": [ + "new_state", + "old_state" + ] + }, "SledResultsPage": { "description": "A single page of results", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index ca96341f2a..26c45f0ff7 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -425,7 +425,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" @@ -441,7 +441,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" @@ -456,10 +456,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" +source.commit = "579592bf474ec4b86805ada60c1b920b3beef5a7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" +source.sha256 = "82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/15.0.0/up1.sql b/schema/crdb/15.0.0/up1.sql new file mode 100644 index 0000000000..04baa76370 --- /dev/null +++ b/schema/crdb/15.0.0/up1.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state AS ENUM ( + -- New resources can be provisioned onto the sled + 'provisionable', + -- New resources must not be provisioned onto the sled + 'non_provisionable' +); diff --git a/schema/crdb/15.0.0/up2.sql b/schema/crdb/15.0.0/up2.sql new file mode 100644 index 0000000000..e3ea2ba11c --- /dev/null +++ b/schema/crdb/15.0.0/up2.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.sled + ADD COLUMN IF NOT EXISTS provision_state omicron.public.sled_provision_state + NOT NULL DEFAULT 'provisionable'; diff --git a/schema/crdb/15.0.0/up3.sql b/schema/crdb/15.0.0/up3.sql new file mode 100644 index 0000000000..aaa3feac20 --- /dev/null +++ b/schema/crdb/15.0.0/up3.sql @@ -0,0 +1,5 @@ +-- Drop the default column value for provision_state -- it should always be set +-- by Nexus. +ALTER TABLE omicron.public.sled + ALTER COLUMN provision_state + DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6019e27782..718e443669 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -73,6 +73,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.rack ( * Sleds */ +CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state AS ENUM ( + -- New resources can be provisioned onto the sled + 'provisionable', + -- New resources must not be provisioned onto the sled + 'non_provisionable' +); + CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* Identity metadata (asset) */ id UUID PRIMARY KEY, @@ -104,6 +111,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* The last address allocated to an Oxide service on this sled. */ last_used_address INET NOT NULL, + /* The state of whether resources should be provisioned onto the sled */ + provision_state omicron.public.sled_provision_state NOT NULL, + -- This constraint should be upheld, even for deleted disks -- in the fleet. CONSTRAINT serial_part_revision_unique UNIQUE ( diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index a6f022f5f2..c37f0ffde6 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -26,7 +26,7 @@ use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::{DhcpCfg, PortManager}; -use illumos_utils::running_zone::{InstalledZone, RunningZone}; +use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; @@ -226,6 +226,9 @@ struct InstanceInner { // Storage resources storage: StorageHandle, + // Used to create propolis zones + zone_builder_factory: ZoneBuilderFactory, + // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, @@ -611,6 +614,7 @@ impl Instance { port_manager, storage, zone_bundler, + zone_builder_factory, } = services; let mut dhcp_config = DhcpCfg { @@ -678,6 +682,7 @@ impl Instance { running_state: None, nexus_client, storage, + zone_builder_factory, zone_bundler, instance_ticket: ticket, }; @@ -904,31 +909,28 @@ impl Instance { .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)? .clone(); - let installed_zone = InstalledZone::install( - &inner.log, - &inner.vnic_allocator, - &root, - &["/opt/oxide".into()], - "propolis-server", - Some(*inner.propolis_id()), - // dataset= - &[], - // filesystems= - &[], - // data_links= - &[], - &[ + let installed_zone = inner + .zone_builder_factory + .builder() + .with_log(inner.log.clone()) + .with_underlay_vnic_allocator(&inner.vnic_allocator) + .with_zone_root_path(&root) + .with_zone_image_paths(&["/opt/oxide".into()]) + .with_zone_type("propolis-server") + .with_unique_name(*inner.propolis_id()) + .with_datasets(&[]) + .with_filesystems(&[]) + .with_data_links(&[]) + .with_devices(&[ zone::Device { name: "/dev/vmm/*".to_string() }, zone::Device { name: "/dev/vmmctl".to_string() }, zone::Device { name: "/dev/viona".to_string() }, - ], - opte_ports, - // physical_nic= - None, - vec![], - vec![], - ) - .await?; + ]) + .with_opte_ports(opte_ports) + .with_links(vec![]) + .with_limit_priv(vec![]) + .install() + .await?; let gateway = inner.port_manager.underlay_ip(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fa40a876f0..c1b7e402a4 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -17,6 +17,7 @@ use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; +use illumos_utils::running_zone::ZoneBuilderFactory; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; @@ -76,6 +77,7 @@ struct InstanceManagerInternal { port_manager: PortManager, storage: StorageHandle, zone_bundler: ZoneBundler, + zone_builder_factory: ZoneBuilderFactory, } pub(crate) struct InstanceManagerServices { @@ -84,6 +86,7 @@ pub(crate) struct InstanceManagerServices { pub port_manager: PortManager, pub storage: StorageHandle, pub zone_bundler: ZoneBundler, + pub zone_builder_factory: ZoneBuilderFactory, } /// All instances currently running on the sled. @@ -100,6 +103,7 @@ impl InstanceManager { port_manager: PortManager, storage: StorageHandle, zone_bundler: ZoneBundler, + zone_builder_factory: ZoneBuilderFactory, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { @@ -113,6 +117,7 @@ impl InstanceManager { port_manager, storage, zone_bundler, + zone_builder_factory, }), }) } @@ -266,6 +271,10 @@ impl InstanceManager { port_manager: self.inner.port_manager.clone(), storage: self.inner.storage.clone(), zone_bundler: self.inner.zone_bundler.clone(), + zone_builder_factory: self + .inner + .zone_builder_factory + .clone(), }; let state = crate::instance::InstanceInitialState { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b87c91768b..2caa640e22 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -53,7 +53,7 @@ use illumos_utils::dladm::{ use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket}; use illumos_utils::running_zone::{ - InstalledZone, RunCommandError, RunningZone, + InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory, }; use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT; use illumos_utils::zone::AddressRequest; @@ -1103,23 +1103,28 @@ impl ServiceManager { .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET)); } - let installed_zone = InstalledZone::install( - &self.inner.log, - &self.inner.underlay_vnic_allocator, - &request.root, - zone_image_paths.as_slice(), - &request.zone.zone_type.to_string(), - unique_name, - datasets.as_slice(), - &filesystems, - &data_links, - &devices, - opte_ports, - bootstrap_vnic, - links, - limit_priv, - ) - .await?; + let mut zone_builder = ZoneBuilderFactory::default().builder(); + if let Some(uuid) = unique_name { + zone_builder = zone_builder.with_unique_name(uuid); + } + if let Some(vnic) = bootstrap_vnic { + zone_builder = zone_builder.with_bootstrap_vnic(vnic); + } + let installed_zone = zone_builder + .with_log(self.inner.log.clone()) + .with_underlay_vnic_allocator(&self.inner.underlay_vnic_allocator) + .with_zone_root_path(&request.root) + .with_zone_image_paths(zone_image_paths.as_slice()) + .with_zone_type(&request.zone.zone_type.to_string()) + .with_datasets(datasets.as_slice()) + .with_filesystems(&filesystems) + .with_data_links(&data_links) + .with_devices(&devices) + .with_opte_ports(opte_ports) + .with_links(links) + .with_limit_priv(limit_priv) + .install() + .await?; // TODO(https://github.com/oxidecomputer/omicron/issues/1898): // diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index cfa8c5d7ca..f5b71106cd 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -68,6 +68,7 @@ use std::sync::Arc; use tokio::sync::oneshot; use uuid::Uuid; +use illumos_utils::running_zone::ZoneBuilderFactory; #[cfg(not(test))] use illumos_utils::{dladm::Dladm, zone::Zones}; #[cfg(test)] @@ -382,6 +383,7 @@ impl SledAgent { port_manager.clone(), storage_manager.clone(), long_running_task_handles.zone_bundler.clone(), + ZoneBuilderFactory::default(), )?; // Configure the VMM reservoir as either a percentage of DRAM or as an diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 76bdb9ca92..f60ea76380 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" +COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index d6d1788cbc..649db53f6e 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" +COMMIT="579592bf474ec4b86805ada60c1b920b3beef5a7" SHA2="6c1fab8d5028b52a161d8bf02aae47844699cdc5f7b28e1ac519fc4ec1ab3971" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 9657147159..08b04d6b67 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" +CIDL_SHA256="82aa1ca1d7701b2221c442d58f912be59798258d574effcb866ffab22753cf38" MGD_LINUX_SHA256="81231b30872fa1c581aa22c101f32d11f33f335758ac1fd2653436fbc7aab93f" \ No newline at end of file diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1a289bd0cb..fe7c3bdc81 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -105,7 +105,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } +zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [build-dependencies] @@ -201,7 +201,7 @@ unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } uuid = { version = "1.6.1", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zeroize = { version = "1.6.0", features = ["std", "zeroize_derive"] } +zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] @@ -209,58 +209,64 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] }