diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index b14cf5a96f..be19659c69 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -43,6 +43,7 @@ progenitor::generate_api!( replace = { Baseboard = nexus_sled_agent_shared::inventory::Baseboard, ByteCount = omicron_common::api::external::ByteCount, + DatasetKind = omicron_common::api::internal::shared::DatasetKind, DiskIdentity = omicron_common::disk::DiskIdentity, DiskVariant = omicron_common::disk::DiskVariant, Generation = omicron_common::api::external::Generation, diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 5945efe16d..4826292863 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -10,13 +10,14 @@ use crate::{ }; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::{ collections::{HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, }; +use strum::EnumCount; use uuid::Uuid; use super::nexus::HostIdentifier; @@ -837,13 +838,11 @@ pub struct ResolvedVpcRouteSet { } /// Describes the purpose of the dataset. -#[derive( - Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq, -)] -#[serde(rename_all = "snake_case")] +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount)] pub enum DatasetKind { - Crucible, + // Durable datasets for zones Cockroach, + Crucible, /// Used for single-node clickhouse deployments Clickhouse, /// Used for replicated clickhouse deployments @@ -852,24 +851,153 @@ pub enum DatasetKind { ClickhouseServer, ExternalDns, InternalDns, + + // Zone filesystems + ZoneRoot, + Zone { + name: String, + }, + + // Other datasets + Debug, +} + +impl Serialize for DatasetKind { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for DatasetKind { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse().map_err(de::Error::custom) + } +} + +impl JsonSchema for DatasetKind { + fn schema_name() -> String { + "DatasetKind".to_string() + } + + fn json_schema( + gen: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + // The schema is a bit more complicated than this -- it's either one of + // the fixed values or a string starting with "zone/" -- but this is + // good enough for now. + let mut schema = ::json_schema(gen).into_object(); + schema.metadata().description = Some( + "The kind of dataset. See the `DatasetKind` enum \ + in omicron-common for possible values." + .to_owned(), + ); + schema.into() + } +} + +impl DatasetKind { + pub fn dataset_should_be_encrypted(&self) -> bool { + match self { + // We encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + DatasetKind::Crucible => false, + _ => true, + } + } + + /// Returns true if this dataset is delegated to a non-global zone. + pub fn zoned(&self) -> bool { + use DatasetKind::*; + match self { + Cockroach | Crucible | Clickhouse | ClickhouseKeeper + | ClickhouseServer | ExternalDns | InternalDns => true, + ZoneRoot | Zone { .. } | Debug => false, + } + } + + /// Returns the zone name, if this is a dataset for a zone filesystem. + /// + /// Otherwise, returns "None". + pub fn zone_name(&self) -> Option<&str> { + if let DatasetKind::Zone { name } = self { + Some(name) + } else { + None + } + } } +// Be cautious updating this implementation: +// +// - It should align with [DatasetKind::FromStr], below +// - The strings here are used here comprise the dataset name, stored durably +// on-disk impl fmt::Display for DatasetKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use DatasetKind::*; let s = match self { Crucible => "crucible", - Cockroach => "cockroach", + Cockroach => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", ClickhouseServer => "clickhouse_server", ExternalDns => "external_dns", InternalDns => "internal_dns", + ZoneRoot => "zone", + Zone { name } => { + write!(f, "zone/{}", name)?; + return Ok(()); + } + Debug => "debug", }; write!(f, "{}", s) } } +#[derive(Debug, thiserror::Error)] +pub enum DatasetKindParseError { + #[error("Dataset unknown: {0}")] + UnknownDataset(String), +} + +impl FromStr for DatasetKind { + type Err = DatasetKindParseError; + + fn from_str(s: &str) -> Result { + use DatasetKind::*; + let kind = match s { + "cockroachdb" => Cockroach, + "crucible" => Crucible, + "clickhouse" => Clickhouse, + "clickhouse_keeper" => ClickhouseKeeper, + "clickhouse_server" => ClickhouseServer, + "external_dns" => ExternalDns, + "internal_dns" => InternalDns, + "zone" => ZoneRoot, + "debug" => Debug, + other => { + if let Some(name) = other.strip_prefix("zone/") { + Zone { name: name.to_string() } + } else { + return Err(DatasetKindParseError::UnknownDataset( + s.to_string(), + )); + } + } + }; + Ok(kind) + } +} + /// Identifiers for a single sled. /// /// This is intended primarily to be used in timeseries, to identify @@ -892,6 +1020,7 @@ pub struct SledIdentifiers { #[cfg(test)] mod tests { + use super::*; use crate::api::internal::shared::AllowedSourceIps; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use std::net::{Ipv4Addr, Ipv6Addr}; @@ -936,4 +1065,49 @@ mod tests { serde_json::from_str(r#"{"allow":"any"}"#).unwrap(), ); } + + #[test] + fn test_dataset_kind_serialization() { + let kinds = [ + DatasetKind::Cockroach, + DatasetKind::Crucible, + DatasetKind::Clickhouse, + DatasetKind::ClickhouseKeeper, + DatasetKind::ClickhouseServer, + DatasetKind::ExternalDns, + DatasetKind::InternalDns, + DatasetKind::ZoneRoot, + DatasetKind::Zone { name: String::from("myzone") }, + DatasetKind::Debug, + ]; + + assert_eq!(kinds.len(), DatasetKind::COUNT); + + for kind in &kinds { + // To string, from string + let as_str = kind.to_string(); + let from_str = + DatasetKind::from_str(&as_str).unwrap_or_else(|_| { + panic!("Failed to convert {kind} to and from string") + }); + assert_eq!( + *kind, from_str, + "{kind} failed to convert to/from a string" + ); + + // Serialize, deserialize + let ser = serde_json::to_string(&kind) + .unwrap_or_else(|_| panic!("Failed to serialize {kind}")); + let de: DatasetKind = serde_json::from_str(&ser) + .unwrap_or_else(|_| panic!("Failed to deserialize {kind}")); + assert_eq!(*kind, de, "{kind} failed serialization"); + + // Test that serialization is equivalent to stringifying. + assert_eq!( + format!("\"{as_str}\""), + ser, + "{kind} does not match stringification/serialization" + ); + } + } } diff --git a/common/src/disk.rs b/common/src/disk.rs index d8b4c2e0a1..ed0bf8666e 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -4,18 +4,23 @@ //! Disk related types shared among crates -use std::fmt; - use anyhow::bail; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fmt; use uuid::Uuid; use crate::{ - api::external::Generation, ledger::Ledgerable, zpool_name::ZpoolKind, + api::external::Generation, + ledger::Ledgerable, + zpool_name::{ZpoolKind, ZpoolName}, }; +pub use crate::api::internal::shared::DatasetKind; + #[derive( Clone, Debug, @@ -72,6 +77,243 @@ impl OmicronPhysicalDisksConfig { } } +#[derive( + Debug, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + Clone, + JsonSchema, + PartialOrd, + Ord, +)] +pub struct DatasetName { + // A unique identifier for the Zpool on which the dataset is stored. + pool_name: ZpoolName, + // A name for the dataset within the Zpool. + kind: DatasetKind, +} + +impl DatasetName { + pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { + Self { pool_name, kind } + } + + pub fn pool(&self) -> &ZpoolName { + &self.pool_name + } + + pub fn dataset(&self) -> &DatasetKind { + &self.kind + } + + /// Returns the full name of the dataset, as would be returned from + /// "zfs get" or "zfs list". + /// + /// If this dataset should be encrypted, this automatically adds the + /// "crypt" dataset component. + pub fn full_name(&self) -> String { + // Currently, we encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + if self.kind.dataset_should_be_encrypted() { + self.full_encrypted_name() + } else { + self.full_unencrypted_name() + } + } + + fn full_encrypted_name(&self) -> String { + format!("{}/crypt/{}", self.pool_name, self.kind) + } + + fn full_unencrypted_name(&self) -> String { + format!("{}/{}", self.pool_name, self.kind) + } +} + +#[derive( + Copy, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct GzipLevel(u8); + +// Fastest compression level +const GZIP_LEVEL_MIN: u8 = 1; + +// Best compression ratio +const GZIP_LEVEL_MAX: u8 = 9; + +impl GzipLevel { + pub const fn new() -> Self { + assert!(N >= GZIP_LEVEL_MIN, "Compression level too small"); + assert!(N <= GZIP_LEVEL_MAX, "Compression level too large"); + Self(N) + } +} + +#[derive( + Copy, + Clone, + Debug, + Default, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CompressionAlgorithm { + // Selects a default compression algorithm. This is dependent on both the + // zpool and OS version. + On, + + // Disables compression. + #[default] + Off, + + // Selects the default Gzip compression level. + // + // According to the ZFS docs, this is "gzip-6", but that's a default value, + // which may change with OS updates. + Gzip, + + GzipN { + level: GzipLevel, + }, + Lz4, + Lzjb, + Zle, +} + +impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use CompressionAlgorithm::*; + let s = match self { + On => "on", + Off => "off", + Gzip => "gzip", + GzipN { level } => { + return write!(f, "gzip-{}", level.0); + } + Lz4 => "lz4", + Lzjb => "lzjb", + Zle => "zle", + }; + write!(f, "{}", s) + } +} + +/// Configuration information necessary to request a single dataset +#[derive( + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct DatasetConfig { + /// The UUID of the dataset being requested + pub id: DatasetUuid, + + /// The dataset's name + pub name: DatasetName, + + /// The compression mode to be used by the dataset + pub compression: CompressionAlgorithm, + + /// The upper bound on the amount of storage used by this dataset + pub quota: Option, + + /// The lower bound on the amount of storage usable by this dataset + pub reservation: Option, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct DatasetsConfig { + /// generation number of this configuration + /// + /// This generation number is owned by the control plane (i.e., RSS or + /// Nexus, depending on whether RSS-to-Nexus handoff has happened). It + /// should not be bumped within Sled Agent. + /// + /// Sled Agent rejects attempts to set the configuration to a generation + /// older than the one it's currently running. + /// + /// Note that "Generation::new()", AKA, the first generation number, + /// is reserved for "no datasets". This is the default configuration + /// for a sled before any requests have been made. + pub generation: Generation, + + pub datasets: BTreeMap, +} + +impl Default for DatasetsConfig { + fn default() -> Self { + Self { generation: Generation::new(), datasets: BTreeMap::new() } + } +} + +impl Ledgerable for DatasetsConfig { + fn is_newer_than(&self, other: &Self) -> bool { + self.generation > other.generation + } + + // No need to do this, the generation number is provided externally. + fn generation_bump(&mut self) {} +} + +/// Identifies how a single dataset management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DatasetManagementStatus { + pub dataset_name: DatasetName, + pub err: Option, +} + +/// The result from attempting to manage datasets. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] +pub struct DatasetsManagementResult { + pub status: Vec, +} + +impl DatasetsManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } +} + /// Uniquely identifies a disk. #[derive( Debug, diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 139e6fe607..5d512677f8 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -6,6 +6,7 @@ use crate::{execute, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; +use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DiskIdentity; use std::fmt; @@ -203,7 +204,8 @@ pub struct EncryptionDetails { #[derive(Debug, Default)] pub struct SizeDetails { pub quota: Option, - pub compression: Option<&'static str>, + pub reservation: Option, + pub compression: CompressionAlgorithm, } #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] @@ -259,9 +261,27 @@ impl Zfs { Ok(()) } - /// Creates a new ZFS filesystem named `name`, unless one already exists. + /// Creates a new ZFS filesystem unless one already exists. /// - /// Applies an optional quota, provided _in bytes_. + /// - `name`: the full path to the zfs dataset + /// - `mountpoint`: The expected mountpoint of this filesystem. + /// If the filesystem already exists, and is not mounted here, and error is + /// returned. + /// - `zoned`: identifies whether or not this filesystem should be + /// used in a zone. Only used when creating a new filesystem - ignored + /// if the filesystem already exists. + /// - `do_format`: if "false", prevents a new filesystem from being created, + /// and returns an error if it is not found. + /// - `encryption_details`: Ensures a filesystem as an encryption root. + /// For new filesystems, this supplies the key, and all datasets within this + /// root are implicitly encrypted. For existing filesystems, ensures that + /// they are mounted (and that keys are loaded), but does not verify the + /// input details. + /// - `size_details`: If supplied, sets size-related information. These + /// values are set on both new filesystem creation as well as when loading + /// existing filesystems. + /// - `additional_options`: Additional ZFS options, which are only set when + /// creating new filesystems. #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, @@ -274,10 +294,18 @@ impl Zfs { ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // apply quota and compression mode (in case they've changed across // sled-agent versions since creation) - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } if encryption_details.is_none() { @@ -351,42 +379,64 @@ impl Zfs { })?; } - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // Apply any quota and compression mode. - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } Ok(()) } + /// Applies the following properties to the filesystem. + /// + /// If any of the options are not supplied, a default "none" or "off" + /// value is supplied. fn apply_properties( name: &str, mountpoint: &Mountpoint, quota: Option, - compression: Option<&'static str>, + reservation: Option, + compression: CompressionAlgorithm, ) -> Result<(), EnsureFilesystemError> { - if let Some(quota) = quota { - if let Err(err) = - Self::set_value(name, "quota", &format!("{quota}")) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + let quota = quota + .map(|q| q.to_string()) + .unwrap_or_else(|| String::from("none")); + let reservation = reservation + .map(|r| r.to_string()) + .unwrap_or_else(|| String::from("none")); + let compression = compression.to_string(); + + if let Err(err) = Self::set_value(name, "quota", "a) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } - if let Some(compression) = compression { - if let Err(err) = Self::set_value(name, "compression", compression) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + if let Err(err) = Self::set_value(name, "reservation", &reservation) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); + } + if let Err(err) = Self::set_value(name, "compression", &compression) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } Ok(()) } diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index a9dee990b9..f896f11c5b 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -8,6 +8,7 @@ use crate::ipv6; use crate::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -41,6 +42,7 @@ pub struct Dataset { pub kind: DatasetKind, pub size_used: Option, + zone_name: Option, } impl Dataset { @@ -48,12 +50,15 @@ impl Dataset { id: Uuid, pool_id: Uuid, addr: Option, - kind: DatasetKind, + api_kind: ApiDatasetKind, ) -> Self { - let size_used = match kind { - DatasetKind::Crucible => Some(0), - _ => None, + let kind = DatasetKind::from(&api_kind); + let (size_used, zone_name) = match api_kind { + ApiDatasetKind::Crucible => (Some(0), None), + ApiDatasetKind::Zone { name } => (None, Some(name)), + _ => (None, None), }; + Self { identity: DatasetIdentity::new(id), time_deleted: None, @@ -63,6 +68,7 @@ impl Dataset { port: addr.map(|addr| addr.port().into()), kind, size_used, + zone_name, } } diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index 4a86efaca1..40ec76ded3 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -23,10 +23,13 @@ impl_enum_type!( ClickhouseServer => b"clickhouse_server" ExternalDns => b"external_dns" InternalDns => b"internal_dns" + ZoneRoot => b"zone_root" + Zone => b"zone" + Debug => b"debug" ); -impl From for DatasetKind { - fn from(k: internal::shared::DatasetKind) -> Self { +impl From<&internal::shared::DatasetKind> for DatasetKind { + fn from(k: &internal::shared::DatasetKind) -> Self { match k { internal::shared::DatasetKind::Crucible => DatasetKind::Crucible, internal::shared::DatasetKind::Cockroach => DatasetKind::Cockroach, @@ -45,6 +48,13 @@ impl From for DatasetKind { internal::shared::DatasetKind::InternalDns => { DatasetKind::InternalDns } + internal::shared::DatasetKind::ZoneRoot => DatasetKind::ZoneRoot, + // Enums in the database do not have associated data, so this drops + // the "name" of the zone and only considers the type. + // + // The zone name, if it exists, is stored in a separate column. + internal::shared::DatasetKind::Zone { .. } => DatasetKind::Zone, + internal::shared::DatasetKind::Debug => DatasetKind::Debug, } } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index f01f33c39d..5d9b3da78f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1023,6 +1023,7 @@ table! { kind -> crate::DatasetKindEnum, size_used -> Nullable, + zone_name -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index eaed2990c5..2438f37fba 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(92, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(93, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(93, "dataset-kinds-zone-and-debug"), KnownVersion::new(92, "lldp-link-config-nullable"), KnownVersion::new(91, "add-management-gateway-producer-kind"), KnownVersion::new(90, "lookup-bgp-config-by-asn"), diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index a08e346fe8..0fe1c7912e 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -241,6 +241,7 @@ mod test { use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use omicron_test_utils::dev; #[tokio::test] @@ -291,7 +292,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Crucible, + ApiDatasetKind::Crucible, )) .await .expect("failed to insert dataset") @@ -324,7 +325,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); @@ -340,7 +341,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to upsert dataset"); @@ -372,7 +373,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 9e69800fed..5b1163dc8b 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -431,10 +431,10 @@ mod test { use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ - BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, - PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, - Project, Rack, Region, SiloUser, SledBaseboard, SledSystemHardware, - SledUpdate, SshKey, Zpool, + BlockSize, ConsoleSession, Dataset, ExternalIp, PhysicalDisk, + PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, Project, Rack, + Region, SiloUser, SledBaseboard, SledSystemHardware, SledUpdate, + SshKey, Zpool, }; use crate::db::queries::vpc_subnet::InsertVpcSubnetQuery; use chrono::{Duration, Utc}; @@ -450,6 +450,7 @@ mod test { use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; + use omicron_common::api::internal::shared::DatasetKind; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 6331770ef5..4e7dde244b 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -270,7 +270,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -284,6 +285,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -310,6 +312,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index e713121d34..b2c164a6d9 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -268,7 +268,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -282,6 +283,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -308,6 +310,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 0b8dc4fca6..97ee23f82e 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -281,7 +281,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -295,6 +296,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -321,6 +323,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index 9ac945f71d..a1cc103594 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -279,7 +279,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -293,6 +294,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -319,6 +321,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 6444934ba6..2f84378a13 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -67,7 +67,7 @@ pub(crate) async fn ensure_dataset_records_exist( id.into_untyped_uuid(), pool_id.into_untyped_uuid(), Some(address), - kind.into(), + kind.clone(), ); let maybe_inserted = datastore .dataset_insert_if_not_exists(dataset) diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index af95eb8e77..d94bbe2e27 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -135,7 +135,6 @@ mod test { use httptest::responders::status_code; use httptest::Expectation; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; @@ -153,6 +152,7 @@ mod test { use nexus_types::identity::Asset; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; + use omicron_common::api::internal::shared::DatasetKind; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index 602f3f85e8..6e49ddc7f0 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -179,13 +179,13 @@ mod tests { use diesel::ExpressionMethods; use diesel::QueryDsl; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::Region; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{ DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, }; diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index f3c0031327..835541c2ea 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -147,7 +147,7 @@ impl super::Nexus { dataset.dataset_id, dataset.zpool_id, Some(dataset.request.address), - dataset.request.kind.into(), + dataset.request.kind, ) }) .collect(); diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index 86aab2ac22..1bc1491468 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -747,7 +747,6 @@ pub(crate) mod test { }; use chrono::Utc; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::Region; use nexus_db_model::RegionReplacement; use nexus_db_model::RegionReplacementState; @@ -758,6 +757,7 @@ pub(crate) mod test { use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils_macros::nexus_test; use nexus_types::identity::Asset; + use omicron_common::api::internal::shared::DatasetKind; use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 261045670e..9c21ca73a1 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -12,7 +12,6 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; -use nexus_db_queries::db::model::DatasetKind; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::SledFilter; @@ -23,6 +22,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{GenericUuid, SledUuid}; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; @@ -292,13 +292,12 @@ impl super::Nexus { // Datasets (contained within zpools) - /// Upserts a dataset into the database, updating it if it already exists. - pub(crate) async fn upsert_dataset( + /// Upserts a crucible dataset into the database, updating it if it already exists. + pub(crate) async fn upsert_crucible_dataset( &self, id: Uuid, zpool_id: Uuid, address: SocketAddrV6, - kind: DatasetKind, ) -> Result<(), Error> { info!( self.log, @@ -307,6 +306,7 @@ impl super::Nexus { "dataset_id" => id.to_string(), "address" => address.to_string() ); + let kind = DatasetKind::Crucible; let dataset = db::model::Dataset::new(id, zpool_id, Some(address), kind); self.db_datastore.dataset_upsert(dataset).await?; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index d5c853b15b..284e8de2ea 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -384,12 +384,7 @@ impl nexus_test_interface::NexusServer for Server { self.apictx .context .nexus - .upsert_dataset( - dataset_id, - zpool_id, - address, - nexus_db_queries::db::model::DatasetKind::Crucible, - ) + .upsert_crucible_dataset(dataset_id, zpool_id, address) .await .unwrap(); } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 619a2187b5..da8bbacf8b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2711,39 +2711,8 @@ ] }, "DatasetKind": { - "description": "Describes the purpose of the dataset.", - "oneOf": [ - { - "type": "string", - "enum": [ - "crucible", - "cockroach", - "external_dns", - "internal_dns" - ] - }, - { - "description": "Used for single-node clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_server" - ] - } - ] + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" }, "DatasetPutRequest": { "description": "Describes a dataset within a pool.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ec2a8bfc4d..bb8e4e0b87 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -176,6 +176,62 @@ } } }, + "/datasets": { + "get": { + "summary": "Lists the datasets that this sled is configured to use", + "operationId": "datasets_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Configures datasets to be used on this sled", + "operationId": "datasets_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsConfig" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsManagementResult" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", @@ -2005,6 +2061,112 @@ } ] }, + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "level": { + "$ref": "#/components/schemas/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + }, + "required": [ + "level", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, "CrucibleOpts": { "description": "CrucibleOpts\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"lossy\", \"read_only\", \"target\" ], \"properties\": { \"cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"control\": { \"type\": [ \"string\", \"null\" ] }, \"flush_timeout\": { \"type\": [ \"number\", \"null\" ], \"format\": \"float\" }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"key\": { \"type\": [ \"string\", \"null\" ] }, \"key_pem\": { \"type\": [ \"string\", \"null\" ] }, \"lossy\": { \"type\": \"boolean\" }, \"read_only\": { \"type\": \"boolean\" }, \"root_cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"target\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } } } } ```
", "type": "object", @@ -2058,6 +2220,128 @@ "target" ] }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/components/schemas/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetName" + } + ] + }, + "quota": { + "nullable": true, + "description": "The upper bound on the amount of storage used by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "reservation": { + "nullable": true, + "description": "The lower bound on the amount of storage usable by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "compression", + "id", + "name" + ] + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetManagementStatus": { + "description": "Identifies how a single dataset management operation may have succeeded or failed.", + "type": "object", + "properties": { + "dataset_name": { + "$ref": "#/components/schemas/DatasetName" + }, + "err": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "dataset_name" + ] + }, + "DatasetName": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/DatasetKind" + }, + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "kind", + "pool_name" + ] + }, + "DatasetsConfig": { + "type": "object", + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "datasets", + "generation" + ] + }, + "DatasetsManagementResult": { + "description": "The result from attempting to manage datasets.", + "type": "object", + "properties": { + "status": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetManagementStatus" + } + } + }, + "required": [ + "status" + ] + }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -2700,6 +2984,11 @@ "format": "uint64", "minimum": 0 }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "HostIdentifier": { "description": "A `HostIdentifier` represents either an IP host or network (v4 or v6), or an entire VPC (identified by its VNI). It is used in firewall rule host filters.", "oneOf": [ @@ -4762,6 +5051,10 @@ "sync" ] }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForInstanceKind": { "type": "string", "format": "uuid" diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up01.sql b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql new file mode 100644 index 0000000000..1cfe718d00 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone_root' AFTER 'internal_dns'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up02.sql b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql new file mode 100644 index 0000000000..93178e3685 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone' AFTER 'zone_root'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up03.sql b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql new file mode 100644 index 0000000000..58d215d177 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'debug' AFTER 'zone'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up04.sql b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql new file mode 100644 index 0000000000..b92bce1b6c --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ADD COLUMN IF NOT EXISTS zone_name TEXT; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up05.sql b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql new file mode 100644 index 0000000000..3f33b79c72 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.dataset ADD CONSTRAINT IF NOT EXISTS zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) +) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index d531672832..e851d2ed6b 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -509,7 +509,10 @@ CREATE TYPE IF NOT EXISTS omicron.public.dataset_kind AS ENUM ( 'clickhouse_keeper', 'clickhouse_server', 'external_dns', - 'internal_dns' + 'internal_dns', + 'zone_root', + 'zone', + 'debug' ); /* @@ -535,6 +538,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( /* An upper bound on the amount of space that might be in-use */ size_used INT, + /* Only valid if kind = zone -- the name of this zone */ + zone_name TEXT, + /* Crucible must make use of 'size_used'; other datasets manage their own storage */ CONSTRAINT size_used_column_set_for_crucible CHECK ( (kind != 'crucible') OR @@ -544,6 +550,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( CONSTRAINT ip_and_port_set_for_crucible CHECK ( (kind != 'crucible') OR (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) + ), + + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) ) ); @@ -4214,7 +4225,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '92.0.0', NULL) + (TRUE, NOW(), NOW(), '93.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/omicron-datasets.json b/schema/omicron-datasets.json new file mode 100644 index 0000000000..07fc2cfb13 --- /dev/null +++ b/schema/omicron-datasets.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DatasetsConfig", + "type": "object", + "required": [ + "datasets", + "generation" + ], + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + }, + "definitions": { + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + } + }, + { + "type": "object", + "required": [ + "level", + "type" + ], + "properties": { + "level": { + "$ref": "#/definitions/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + } + } + ] + }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "required": [ + "compression", + "id", + "name" + ], + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/definitions/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/definitions/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/definitions/DatasetName" + } + ] + }, + "quota": { + "description": "The upper bound on the amount of storage used by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "reservation": { + "description": "The lower bound on the amount of storage usable by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + } + } + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetName": { + "type": "object", + "required": [ + "kind", + "pool_name" + ], + "properties": { + "kind": { + "$ref": "#/definitions/DatasetKind" + }, + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 410747bf46..d9e49a5c56 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -21,7 +21,10 @@ use omicron_common::{ SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, + disk::{ + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, + }, }; use omicron_uuid_kinds::{PropolisUuid, ZpoolUuid}; use schemars::JsonSchema; @@ -168,6 +171,25 @@ pub trait SledAgentApi { body: TypedBody, ) -> Result; + /// Configures datasets to be used on this sled + #[endpoint { + method = PUT, + path = "/datasets", + }] + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + /// Lists the datasets that this sled is configured to use + #[endpoint { + method = GET, + path = "/datasets", + }] + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + #[endpoint { method = GET, path = "/omicron-physical-disks", diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index 2e9ea4c8d9..a0f7826db3 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -25,6 +25,7 @@ use camino::Utf8PathBuf; use illumos_utils::zfs::{ EnsureFilesystemError, GetValueError, Mountpoint, SizeDetails, Zfs, }; +use omicron_common::disk::CompressionAlgorithm; use std::io; #[derive(Debug, thiserror::Error)] @@ -50,7 +51,7 @@ struct BackingFs<'a> { // Optional quota, in _bytes_ quota: Option, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, // Linked service service: Option<&'static str>, // Subdirectories to ensure @@ -63,7 +64,7 @@ impl<'a> BackingFs<'a> { name, mountpoint: "legacy", quota: None, - compression: None, + compression: CompressionAlgorithm::Off, service: None, subdirs: None, } @@ -79,8 +80,8 @@ impl<'a> BackingFs<'a> { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } @@ -101,7 +102,7 @@ const BACKING_FMD_SUBDIRS: [&'static str; 3] = ["rsrc", "ckpt", "xprt"]; const BACKING_FMD_SERVICE: &'static str = "svc:/system/fmd:default"; const BACKING_FMD_QUOTA: usize = 500 * (1 << 20); // 500 MiB -const BACKING_COMPRESSION: &'static str = "on"; +const BACKING_COMPRESSION: CompressionAlgorithm = CompressionAlgorithm::On; const BACKINGFS_COUNT: usize = 1; static BACKINGFS: [BackingFs; BACKINGFS_COUNT] = @@ -137,6 +138,7 @@ pub(crate) fn ensure_backing_fs( let size_details = Some(SizeDetails { quota: bfs.quota, + reservation: None, compression: bfs.compression, }); diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 221224a2e9..1d61d97675 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -28,7 +28,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -219,6 +220,23 @@ impl SledAgentApi for SledAgentImpl { .map_err(HttpError::from) } + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } + + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } + async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 419e897d75..de0b086752 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -3,9 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; +use omicron_common::disk::{DatasetKind, DatasetName}; pub use sled_hardware::DendriteAsic; -use sled_storage::dataset::DatasetName; -use sled_storage::dataset::DatasetType; use std::net::SocketAddrV6; /// Extension trait for `OmicronZoneConfig`. @@ -49,25 +48,25 @@ pub(crate) trait OmicronZoneTypeExt { | OmicronZoneType::Oximeter { .. } | OmicronZoneType::CruciblePantry { .. } => None, OmicronZoneType::Clickhouse { dataset, address, .. } => { - Some((dataset, DatasetType::Clickhouse, address)) + Some((dataset, DatasetKind::Clickhouse, address)) } OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseKeeper, address)) + Some((dataset, DatasetKind::ClickhouseKeeper, address)) } OmicronZoneType::ClickhouseServer { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseServer, address)) + Some((dataset, DatasetKind::ClickhouseServer, address)) } OmicronZoneType::CockroachDb { dataset, address, .. } => { - Some((dataset, DatasetType::CockroachDb, address)) + Some((dataset, DatasetKind::Cockroach, address)) } OmicronZoneType::Crucible { dataset, address, .. } => { - Some((dataset, DatasetType::Crucible, address)) + Some((dataset, DatasetKind::Crucible, address)) } OmicronZoneType::ExternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::ExternalDns, http_address)) + Some((dataset, DatasetKind::ExternalDns, http_address)) } OmicronZoneType::InternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::InternalDns, http_address)) + Some((dataset, DatasetKind::InternalDns, http_address)) } }?; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index e0959b0219..7bf3a7a875 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -32,7 +32,8 @@ use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, }; use omicron_common::disk::{ - DiskVariant, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, + DatasetKind, DatasetName, DiskVariant, OmicronPhysicalDiskConfig, + OmicronPhysicalDisksConfig, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_common::policy::{ @@ -50,7 +51,7 @@ use sled_agent_client::{ }; use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::dataset::{DatasetName, DatasetType, CONFIG_DATASET}; +use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; @@ -497,7 +498,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::InternalDns)?; + sled.alloc_dataset_from_u2s(DatasetKind::InternalDns)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { @@ -539,7 +540,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::CockroachDb)?; + sled.alloc_dataset_from_u2s(DatasetKind::Cockroach)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -587,7 +588,7 @@ impl Plan { let dns_address = from_sockaddr_to_external_floating_addr( SocketAddr::new(external_ip, dns_port), ); - let dataset_kind = DatasetType::ExternalDns; + let dataset_kind = DatasetKind::ExternalDns; let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; let filesystem_pool = Some(dataset_name.pool().clone()); @@ -716,7 +717,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::Clickhouse)?; + sled.alloc_dataset_from_u2s(DatasetKind::Clickhouse)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -759,7 +760,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::ClickhouseServer)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseServer)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -800,7 +801,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::ClickhouseKeeper)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseKeeper)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -1034,7 +1035,7 @@ pub struct SledInfo { u2_zpools: Vec, /// spreads components across a Sled's zpools u2_zpool_allocators: - HashMap + Send + Sync>>, + HashMap + Send + Sync>>, /// whether this Sled is a scrimlet is_scrimlet: bool, /// allocator for addresses in this Sled's subnet @@ -1075,7 +1076,7 @@ impl SledInfo { /// this Sled fn alloc_dataset_from_u2s( &mut self, - kind: DatasetType, + kind: DatasetKind, ) -> Result { // We have two goals here: // diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index d6909174ee..7677dfbd8a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -90,6 +90,7 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; +use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use once_cell::sync::OnceCell; @@ -103,9 +104,7 @@ use sled_hardware::underlay; use sled_hardware::SledMode; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{ - DatasetName, DatasetType, CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET, -}; +use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; @@ -1881,7 +1880,7 @@ impl ServiceManager { let dataset_name = DatasetName::new( dataset.pool_name.clone(), - DatasetType::Crucible, + DatasetKind::Crucible, ) .full_name(); let uuid = &Uuid::new_v4().to_string(); diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index aead47658f..ac583a1a74 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -30,6 +30,8 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use sled_agent_api::*; @@ -299,6 +301,23 @@ impl SledAgentApi for SledAgentSimImpl { )) } + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } + + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } + async fn omicron_physical_disks_put( rqctx: RequestContext, body: TypedBody, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 7292b3dee1..aaac7f63d0 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -35,8 +35,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, - OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskIdentity, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -868,6 +868,19 @@ impl SledAgent { }) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + self.storage.lock().await.datasets_ensure(config).await + } + + pub async fn datasets_config_list( + &self, + ) -> Result { + self.storage.lock().await.datasets_config_list().await + } + pub async fn omicron_physical_disks_list( &self, ) -> Result { diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index ac8f80069b..144fb48aa9 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -18,6 +18,9 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; +use omicron_common::disk::DatasetManagementStatus; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; @@ -555,6 +558,7 @@ pub struct Storage { sled_id: Uuid, log: Logger, config: Option, + dataset_config: Option, physical_disks: HashMap, next_disk_slot: i64, zpools: HashMap, @@ -568,6 +572,7 @@ impl Storage { sled_id, log, config: None, + dataset_config: None, physical_disks: HashMap::new(), next_disk_slot: 0, zpools: HashMap::new(), @@ -581,6 +586,45 @@ impl Storage { &self.physical_disks } + pub async fn datasets_config_list( + &self, + ) -> Result { + let Some(config) = self.dataset_config.as_ref() else { + return Err(HttpError::for_not_found( + None, + "No control plane datasets".into(), + )); + }; + Ok(config.clone()) + } + + pub async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + if let Some(stored_config) = self.dataset_config.as_ref() { + if stored_config.generation < config.generation { + return Err(HttpError::for_client_error( + None, + http::StatusCode::BAD_REQUEST, + "Generation number too old".to_string(), + )); + } + } + self.dataset_config.replace(config.clone()); + + Ok(DatasetsManagementResult { + status: config + .datasets + .values() + .map(|config| DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }) + .collect(), + }) + } + pub async fn omicron_physical_disks_list( &mut self, ) -> Result { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d69ccedb7d..f13d8caccf 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -51,7 +51,10 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::{DisksManagementResult, OmicronPhysicalDisksConfig}; +use omicron_common::disk::{ + DatasetsConfig, DatasetsManagementResult, DisksManagementResult, + OmicronPhysicalDisksConfig, +}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use sled_agent_api::Zpool; @@ -808,6 +811,29 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } + pub async fn datasets_config_list(&self) -> Result { + Ok(self.storage().datasets_config_list().await?) + } + + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + info!(self.log, "datasets ensure"); + let datasets_result = self.storage().datasets_ensure(config).await?; + info!(self.log, "datasets ensure: Updated storage"); + + // TODO(https://github.com/oxidecomputer/omicron/issues/6177): + // At the moment, we don't actually remove any datasets -- this function + // just adds new datasets. + // + // Once we start removing old datasets, we should probably ensure that + // they are not longer in-use before returning (similar to + // omicron_physical_disks_ensure). + + Ok(datasets_result) + } + /// Requests the set of physical disks currently managed by the Sled Agent. /// /// This should be contrasted by the set of disks in the inventory, which @@ -896,7 +922,7 @@ impl SledAgent { &self, requested_zones: OmicronZonesConfig, ) -> Result<(), Error> { - // TODO: + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): // - If these are the set of filesystems, we should also consider // removing the ones which are not listed here. // - It's probably worth sending a bulk request to the storage system, diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 74f2be782f..e2b024db11 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -15,10 +15,10 @@ use illumos_utils::zfs::{ use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::api::internal::shared::DatasetKind; -use omicron_common::disk::{DiskIdentity, DiskVariant}; +use omicron_common::disk::{ + CompressionAlgorithm, DatasetName, DiskIdentity, DiskVariant, GzipLevel, +}; use rand::distributions::{Alphanumeric, DistString}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use slog::{debug, info, Logger}; use std::process::Stdio; use std::str::FromStr; @@ -45,7 +45,8 @@ cfg_if! { // tuned as needed. pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30); // passed to zfs create -o compression= -pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9"; +pub const DUMP_DATASET_COMPRESSION: CompressionAlgorithm = + CompressionAlgorithm::GzipN { level: GzipLevel::new::<9>() }; // U.2 datasets live under the encrypted dataset and inherit encryption pub const ZONE_DATASET: &'static str = "crypt/zone"; @@ -102,12 +103,17 @@ struct ExpectedDataset { // Identifies if the dataset should be deleted on boot wipe: bool, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, } impl ExpectedDataset { const fn new(name: &'static str) -> Self { - ExpectedDataset { name, quota: None, wipe: false, compression: None } + ExpectedDataset { + name, + quota: None, + wipe: false, + compression: CompressionAlgorithm::Off, + } } const fn quota(mut self, quota: usize) -> Self { @@ -120,151 +126,12 @@ impl ExpectedDataset { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } } -/// The type of a dataset, and an auxiliary information necessary to -/// successfully launch a zone managing the associated data. -/// -/// There is currently no auxiliary data here, but there's a separation from -/// omicron-common's `DatasetKind` in case there might be some in the future. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum DatasetType { - // TODO: `DatasetKind` uses `Cockroach`, not `CockroachDb`, for historical - // reasons. It may be worth using the same name for both. - CockroachDb, - Crucible, - Clickhouse, - ClickhouseKeeper, - ClickhouseServer, - ExternalDns, - InternalDns, -} - -impl DatasetType { - pub fn dataset_should_be_encrypted(&self) -> bool { - match self { - // We encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - DatasetType::Crucible => false, - _ => true, - } - } - - pub fn kind(&self) -> DatasetKind { - match self { - Self::Crucible => DatasetKind::Crucible, - Self::CockroachDb => DatasetKind::Cockroach, - Self::Clickhouse => DatasetKind::Clickhouse, - Self::ClickhouseKeeper => DatasetKind::ClickhouseKeeper, - Self::ClickhouseServer => DatasetKind::ClickhouseServer, - Self::ExternalDns => DatasetKind::ExternalDns, - Self::InternalDns => DatasetKind::InternalDns, - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum DatasetKindParseError { - #[error("Dataset unknown: {0}")] - UnknownDataset(String), -} - -impl FromStr for DatasetType { - type Err = DatasetKindParseError; - - fn from_str(s: &str) -> Result { - use DatasetType::*; - let kind = match s { - "crucible" => Crucible, - "cockroachdb" => CockroachDb, - "clickhouse" => Clickhouse, - "clickhouse_keeper" => ClickhouseKeeper, - "external_dns" => ExternalDns, - "internal_dns" => InternalDns, - _ => { - return Err(DatasetKindParseError::UnknownDataset( - s.to_string(), - )) - } - }; - Ok(kind) - } -} - -impl std::fmt::Display for DatasetType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DatasetType::*; - let s = match self { - Crucible => "crucible", - CockroachDb => "cockroachdb", - Clickhouse => "clickhouse", - ClickhouseKeeper => "clickhouse_keeper", - ClickhouseServer => "clickhouse_server", - ExternalDns => "external_dns", - InternalDns => "internal_dns", - }; - write!(f, "{}", s) - } -} - -#[derive( - Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, -)] -pub struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - kind: DatasetType, -} - -impl DatasetName { - pub fn new(pool_name: ZpoolName, kind: DatasetType) -> Self { - Self { pool_name, kind } - } - - pub fn pool(&self) -> &ZpoolName { - &self.pool_name - } - - pub fn dataset(&self) -> &DatasetType { - &self.kind - } - - /// Returns the full name of the dataset, as would be returned from - /// "zfs get" or "zfs list". - /// - /// If this dataset should be encrypted, this automatically adds the - /// "crypt" dataset component. - pub fn full_name(&self) -> String { - // Currently, we encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - if self.kind.dataset_should_be_encrypted() { - self.full_encrypted_name() - } else { - self.full_unencrypted_name() - } - } - - fn full_encrypted_name(&self) -> String { - format!("{}/crypt/{}", self.pool_name, self.kind) - } - - fn full_unencrypted_name(&self) -> String { - format!("{}/{}", self.pool_name, self.kind) - } -} - #[derive(Debug, thiserror::Error)] pub enum DatasetError { #[error("Cannot open {path} due to {error}")] @@ -431,6 +298,7 @@ pub(crate) async fn ensure_zpool_has_datasets( let encryption_details = None; let size_details = Some(SizeDetails { quota: dataset.quota, + reservation: None, compression: dataset.compression, }); Zfs::ensure_filesystem( @@ -577,7 +445,7 @@ async fn ensure_zpool_dataset_is_encrypted( zpool_name: &ZpoolName, unencrypted_dataset: &str, ) -> Result<(), DatasetEncryptionMigrationError> { - let Ok(kind) = DatasetType::from_str(&unencrypted_dataset) else { + let Ok(kind) = DatasetKind::from_str(&unencrypted_dataset) else { info!(log, "Unrecognized dataset kind"); return Ok(()); }; @@ -818,7 +686,7 @@ mod test { #[test] fn serialize_dataset_name() { let pool = ZpoolName::new_internal(ZpoolUuid::new_v4()); - let kind = DatasetType::Crucible; + let kind = DatasetKind::Crucible; let name = DatasetName::new(pool, kind); serde_json::to_string(&name).unwrap(); } diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs index 4c5582fd79..988f7f363a 100644 --- a/sled-storage/src/error.rs +++ b/sled-storage/src/error.rs @@ -4,11 +4,12 @@ //! Storage related errors -use crate::dataset::{DatasetError, DatasetName}; +use crate::dataset::DatasetError; use crate::disk::DiskError; use camino::Utf8PathBuf; use omicron_common::api::external::ByteCountRangeError; use omicron_common::api::external::Generation; +use omicron_common::disk::DatasetName; use uuid::Uuid; #[derive(thiserror::Error, Debug)] @@ -83,6 +84,15 @@ pub enum Error { current: Generation, }, + #[error("Invalid configuration (UUID mismatch in arguments)")] + ConfigUuidMismatch, + + #[error("Dataset configuration out-of-date (asked for {requested}, but latest is {current})")] + DatasetConfigurationOutdated { requested: Generation, current: Generation }, + + #[error("Dataset configuration changed for the same generation number: {generation}")] + DatasetConfigurationChanged { generation: Generation }, + #[error("Failed to update ledger in internal storage")] Ledger(#[from] omicron_common::ledger::Error), diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 3cbf00530a..88e1bbaa34 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -7,7 +7,7 @@ use std::collections::HashSet; use crate::config::MountConfig; -use crate::dataset::{DatasetName, CONFIG_DATASET}; +use crate::dataset::CONFIG_DATASET; use crate::disk::RawDisk; use crate::error::Error; use crate::resources::{AllDisks, StorageResources}; @@ -18,11 +18,14 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, + DatasetConfig, DatasetManagementStatus, DatasetName, DatasetsConfig, + DatasetsManagementResult, DiskIdentity, DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; -use slog::{info, o, warn, Logger}; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; +use slog::{error, info, o, warn, Logger}; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; @@ -62,6 +65,9 @@ const SYNCHRONIZE_INTERVAL: Duration = Duration::from_secs(10); // The filename of the ledger storing physical disk info const DISKS_LEDGER_FILENAME: &str = "omicron-physical-disks.json"; +// The filename of the ledger storing dataset info +const DATASETS_LEDGER_FILENAME: &str = "omicron-datasets.json"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum StorageManagerState { // We know that any attempts to manage disks will fail, as the key manager @@ -114,6 +120,16 @@ pub(crate) enum StorageRequest { tx: DebugIgnore>>, }, + DatasetsEnsure { + config: DatasetsConfig, + tx: DebugIgnore< + oneshot::Sender>, + >, + }, + DatasetsList { + tx: DebugIgnore>>, + }, + // Requests to explicitly manage or stop managing a set of devices OmicronPhysicalDisksEnsure { config: OmicronPhysicalDisksConfig, @@ -240,6 +256,31 @@ impl StorageHandle { rx.map(|result| result.unwrap()) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsEnsure { config, tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + + /// Reads the last value written to storage by + /// [Self::datasets_ensure]. + pub async fn datasets_config_list(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsList { tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + pub async fn omicron_physical_disks_ensure( &self, config: OmicronPhysicalDisksConfig, @@ -322,6 +363,10 @@ impl StorageHandle { rx.await.unwrap() } + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): + // + // Deprecate usage of this function, prefer to call "datasets_ensure" + // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, dataset_id: Uuid, @@ -428,6 +473,12 @@ impl StorageManager { self.ensure_using_exactly_these_disks(raw_disks).await; let _ = tx.0.send(Ok(())); } + StorageRequest::DatasetsEnsure { config, tx } => { + let _ = tx.0.send(self.datasets_ensure(config).await); + } + StorageRequest::DatasetsList { tx } => { + let _ = tx.0.send(self.datasets_config_list().await); + } StorageRequest::OmicronPhysicalDisksEnsure { config, tx } => { let _ = tx.0.send(self.omicron_physical_disks_ensure(config).await); @@ -485,6 +536,10 @@ impl StorageManager { ); } + // Sled Agents can remember which disks they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. async fn all_omicron_disk_ledgers(&self) -> Vec { self.resources .disks() @@ -494,6 +549,19 @@ impl StorageManager { .collect() } + // Sled Agents can remember which datasets they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. + async fn all_omicron_dataset_ledgers(&self) -> Vec { + self.resources + .disks() + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(DATASETS_LEDGER_FILENAME)) + .collect() + } + // Manages a newly detected disk that has been attached to this sled. // // For U.2s: we update our inventory. @@ -545,9 +613,11 @@ impl StorageManager { self.resources.insert_or_update_disk(raw_disk).await } - async fn load_ledger(&self) -> Option> { + async fn load_disks_ledger( + &self, + ) -> Option> { let ledger_paths = self.all_omicron_disk_ledgers().await; - let log = self.log.new(o!("request" => "load_ledger")); + let log = self.log.new(o!("request" => "load_disks_ledger")); let maybe_ledger = Ledger::::new( &log, ledger_paths.clone(), @@ -579,7 +649,7 @@ impl StorageManager { // Now that we're actually able to unpack U.2s, attempt to load the // set of disks which we previously stored in the ledger, if one // existed. - let ledger = self.load_ledger().await; + let ledger = self.load_disks_ledger().await; if let Some(ledger) = ledger { info!(self.log, "Setting StorageResources state to match ledger"); @@ -591,9 +661,160 @@ impl StorageManager { info!(self.log, "KeyManager ready, but no ledger detected"); } + // We don't load any configuration for datasets, since we aren't + // currently storing any dataset information in-memory. + // + // If we ever wanted to do so, however, we could load that information + // here. + Ok(()) } + async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + let log = self.log.new(o!("request" => "datasets_ensure")); + + // As a small input-check, confirm that the UUID of the map of inputs + // matches the DatasetConfig. + // + // The dataset configs are sorted by UUID so they always appear in the + // same order, but this check prevents adding an entry of: + // - (UUID: X, Config(UUID: Y)), for X != Y + if !config.datasets.iter().all(|(id, config)| *id == config.id) { + return Err(Error::ConfigUuidMismatch); + } + + // We rely on the schema being stable across reboots -- observe + // "test_datasets_schema" below for that property guarantee. + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + let mut ledger = match maybe_ledger { + Some(ledger) => { + info!( + log, + "Comparing 'requested datasets' to ledger on internal storage" + ); + let ledger_data = ledger.data(); + if config.generation < ledger_data.generation { + warn!( + log, + "Request looks out-of-date compared to prior request"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + return Err(Error::DatasetConfigurationOutdated { + requested: config.generation, + current: ledger_data.generation, + }); + } else if config.generation == ledger_data.generation { + info!( + log, + "Requested generation number matches prior request", + ); + + if ledger_data != &config { + error!( + log, + "Requested configuration changed (with the same generation)"; + "generation" => ?config.generation + ); + return Err(Error::DatasetConfigurationChanged { + generation: config.generation, + }); + } + } else { + info!( + log, + "Request looks newer than prior requests"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + } + ledger + } + None => { + info!(log, "No previously-stored 'requested datasets', creating new ledger"); + Ledger::::new_with( + &log, + ledger_paths.clone(), + DatasetsConfig::default(), + ) + } + }; + + let result = self.datasets_ensure_internal(&log, &config).await; + + let ledger_data = ledger.data_mut(); + if *ledger_data == config { + return Ok(result); + } + *ledger_data = config; + ledger.commit().await?; + + Ok(result) + } + + // Attempts to ensure that each dataset exist. + // + // Does not return an error, because the [DatasetsManagementResult] type + // includes details about all possible errors that may occur on + // a per-dataset granularity. + async fn datasets_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetsConfig, + ) -> DatasetsManagementResult { + let mut status = vec![]; + for dataset in config.datasets.values() { + status.push(self.dataset_ensure_internal(log, dataset).await); + } + DatasetsManagementResult { status } + } + + async fn dataset_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetConfig, + ) -> DatasetManagementStatus { + let log = log.new(o!("name" => config.name.full_name())); + info!(log, "Ensuring dataset"); + let mut status = DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }; + + if let Err(err) = self.ensure_dataset(config).await { + warn!(log, "Failed to ensure dataset"; "dataset" => ?status.dataset_name, "err" => ?err); + status.err = Some(err.to_string()); + }; + + status + } + + // Lists datasets that this sled is configured to use. + async fn datasets_config_list(&mut self) -> Result { + let log = self.log.new(o!("request" => "datasets_config_list")); + + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + match maybe_ledger { + Some(ledger) => { + info!(log, "Found ledger on internal storage"); + return Ok(ledger.data().clone()); + } + None => { + info!(log, "No ledger detected on internal storage"); + return Err(Error::LedgerNotFound); + } + } + } + // Makes an U.2 disk managed by the control plane within [`StorageResources`]. async fn omicron_physical_disks_ensure( &mut self, @@ -765,6 +986,77 @@ impl StorageManager { } } + // Ensures a dataset exists within a zpool, according to `config`. + async fn ensure_dataset( + &mut self, + config: &DatasetConfig, + ) -> Result<(), Error> { + info!(self.log, "ensure_dataset"; "config" => ?config); + + // We can only place datasets within managed disks. + // If a disk is attached to this sled, but not a part of the Control + // Plane, it is treated as "not found" for dataset placement. + if !self + .resources + .disks() + .iter_managed() + .any(|(_, disk)| disk.zpool_name() == config.name.pool()) + { + return Err(Error::ZpoolNotFound(format!( + "{}", + config.name.pool(), + ))); + } + + let zoned = config.name.dataset().zoned(); + let mountpoint_path = if zoned { + Utf8PathBuf::from("/data") + } else { + config.name.pool().dataset_mountpoint( + &Utf8PathBuf::from("/"), + &config.name.dataset().to_string(), + ) + }; + let mountpoint = Mountpoint::Path(mountpoint_path); + + let fs_name = &config.name.full_name(); + let do_format = true; + + // The "crypt" dataset needs these details, but should already exist + // by the time we're creating datasets inside. + let encryption_details = None; + let size_details = Some(illumos_utils::zfs::SizeDetails { + quota: config.quota, + reservation: config.reservation, + compression: config.compression, + }); + Zfs::ensure_filesystem( + fs_name, + mountpoint, + zoned, + do_format, + encryption_details, + size_details, + None, + )?; + // Ensure the dataset has a usable UUID. + if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id) = id_str.parse::() { + if id != config.id { + return Err(Error::UuidMismatch { + name: Box::new(config.name.clone()), + old: id.into_untyped_uuid(), + new: config.id.into_untyped_uuid(), + }); + } + return Ok(()); + } + } + Zfs::set_oxide_value(&fs_name, "uuid", &config.id.to_string())?; + + Ok(()) + } + // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &mut self, @@ -824,16 +1116,19 @@ impl StorageManager { /// systems. #[cfg(all(test, target_os = "illumos"))] mod tests { - use crate::dataset::DatasetType; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; use super::*; use camino_tempfile::tempdir_in; + use omicron_common::api::external::Generation; + use omicron_common::disk::CompressionAlgorithm; + use omicron_common::disk::DatasetKind; use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; + use std::collections::BTreeMap; use std::sync::atomic::Ordering; use uuid::Uuid; @@ -1299,7 +1594,7 @@ mod tests { let dataset_id = Uuid::new_v4(); let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = - DatasetName::new(zpool_name.clone(), DatasetType::Crucible); + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() .upsert_filesystem(dataset_id, dataset_name) @@ -1309,6 +1604,86 @@ mod tests { harness.cleanup().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn ensure_datasets() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_datasets"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a dataset on the newly formatted U.2 + let id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + let datasets = BTreeMap::from([( + id, + DatasetConfig { + id, + name, + compression: CompressionAlgorithm::Off, + quota: None, + reservation: None, + }, + )]); + // "Generation = 1" is reserved as "no requests seen yet", so we jump + // past it. + let generation = Generation::new().next(); + let mut config = DatasetsConfig { generation, datasets }; + + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + // List datasets, expect to see what we just created + let observed_config = + harness.handle().datasets_config_list().await.unwrap(); + assert_eq!(config, observed_config); + + // Calling "datasets_ensure" with the same input should succeed. + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + let current_config_generation = config.generation; + let next_config_generation = config.generation.next(); + + // Calling "datasets_ensure" with an old generation should fail + config.generation = Generation::new(); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationOutdated { .. })); + + // However, calling it with a different input and the same generation + // number should fail. + config.generation = current_config_generation; + config.datasets.values_mut().next().unwrap().reservation = Some(1024); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationChanged { .. })); + + // If we bump the generation number while making a change, updated + // configs will work. + config.generation = next_config_generation; + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + harness.cleanup().await; + logctx.cleanup_successful(); + } } #[cfg(test)] @@ -1322,4 +1697,13 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } + + #[test] + fn test_datasets_schema() { + let schema = schemars::schema_for!(DatasetsConfig); + expectorate::assert_contents( + "../schema/omicron-datasets.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } }