oxidecomputer · smklein · Aug 28, 2024 · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
@@ -704,33 +704,130 @@ pub struct ResolvedVpcRouteSet {
 
 /// Describes the purpose of the dataset.
 #[derive(
-    Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq,
+    Debug,
+    Serialize,
+    Deserialize,
+    JsonSchema,
+    Clone,
+    PartialEq,
+    Eq,
+    Ord,
+    PartialOrd,
+    Hash,
 )]
-#[serde(rename_all = "snake_case")]
+#[serde(tag = "type", rename_all = "snake_case")]
 pub enum DatasetKind {
-    Crucible,
+    // Durable datasets for zones
+
+    // This renaming exists for backwards compatibility -- this enum variant
+    // was serialized to "all-zones-request" as "cockroach_db" and should
+    // stay that way, unless we perform an explicit schema change.
+    #[serde(rename = "cockroach_db")]
     Cockroach,
+    Crucible,
     Clickhouse,
     ClickhouseKeeper,
     ExternalDns,
     InternalDns,
+
+    // Zone filesystems
+    ZoneRoot,
+    Zone {
+        name: String,
+    },
+
+    // Other datasets
+    Debug,
 }
 
+impl DatasetKind {
+    pub fn dataset_should_be_encrypted(&self) -> bool {
+        match self {
+            // We encrypt all datasets except Crucible.
+            //
+            // Crucible already performs encryption internally, and we
+            // avoid double-encryption.
+            DatasetKind::Crucible => false,
+            _ => true,
+        }
+    }
+
+    /// Returns true if this dataset is delegated to a non-global zone.
+    pub fn zoned(&self) -> bool {
+        use DatasetKind::*;
+        match self {
+            Cockroach | Crucible | Clickhouse | ClickhouseKeeper
+            | ExternalDns | InternalDns => true,
+            ZoneRoot | Zone { .. } | Debug => false,
+        }
+    }
+
+    /// Returns the zone name, if this is dataset for a zone filesystem.
+    ///
+    /// Otherwise, returns "None".
+    pub fn zone_name(&self) -> Option<String> {
+        if let DatasetKind::Zone { name } = self {
+            Some(name.clone())
+        } else {
+            None
+        }
+    }
+}
+
+// Be cautious updating this implementation:
+//
+// - It should align with [DatasetKind::FromStr], below
+// - The strings here are used here comprise the dataset name, stored durably
+// on-disk
 impl fmt::Display for DatasetKind {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         use DatasetKind::*;
         let s = match self {
             Crucible => "crucible",
-            Cockroach => "cockroach",
+            Cockroach => "cockroachdb",
 impl ZoneKind { 
 impl ZoneKind { 
             Clickhouse => "clickhouse",
             ClickhouseKeeper => "clickhouse_keeper",
             ExternalDns => "external_dns",
             InternalDns => "internal_dns",
+            ZoneRoot => "zone",
+            Zone { name } => {
+                write!(f, "zone/{}", name)?;
+                return Ok(());
+            }
+            Debug => "debug",
         };
         write!(f, "{}", s)
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetKindParseError {
+    #[error("Dataset unknown: {0}")]
+    UnknownDataset(String),
+}
+
+impl FromStr for DatasetKind {
+    type Err = DatasetKindParseError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use DatasetKind::*;
+        let kind = match s {
+            "crucible" => Crucible,
+            "cockroachdb" | "cockroach_db" => Cockroach,
+            "clickhouse" => Clickhouse,
+            "clickhouse_keeper" => ClickhouseKeeper,
+            "external_dns" => ExternalDns,
+            "internal_dns" => InternalDns,
+            _ => {
+                return Err(DatasetKindParseError::UnknownDataset(
+                    s.to_string(),
+                ))
+            }
+        };
+        Ok(kind)
+    }
+}
+
 /// Identifiers for a single sled.
 ///
 /// This is intended primarily to be used in timeseries, to identify

diff --git a/common/src/disk.rs b/common/src/disk.rs
@@ -4,15 +4,20 @@
 
 //! Disk related types shared among crates
 
+use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::ZpoolUuid;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
 use crate::{
-    api::external::Generation, ledger::Ledgerable, zpool_name::ZpoolKind,
+    api::external::Generation,
+    ledger::Ledgerable,
+    zpool_name::{ZpoolKind, ZpoolName},
 };
 
+pub use crate::api::internal::shared::DatasetKind;
+
 #[derive(
     Clone,
     Debug,
@@ -69,6 +74,130 @@ impl OmicronPhysicalDisksConfig {
     }
 }
 
+#[derive(
+    Debug,
+    PartialEq,
+    Eq,
+    Hash,
+    Serialize,
+    Deserialize,
+    Clone,
+    JsonSchema,
+    PartialOrd,
+    Ord,
+)]
+pub struct DatasetName {
+    // A unique identifier for the Zpool on which the dataset is stored.
+    pool_name: ZpoolName,
+    // A name for the dataset within the Zpool.
+    kind: DatasetKind,
+}
+
+impl DatasetName {
+    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
+        Self { pool_name, kind }
+    }
+
+    pub fn pool(&self) -> &ZpoolName {
+        &self.pool_name
+    }
+
+    pub fn dataset(&self) -> &DatasetKind {
+        &self.kind
+    }
+
+    /// Returns the full name of the dataset, as would be returned from
+    /// "zfs get" or "zfs list".
+    ///
+    /// If this dataset should be encrypted, this automatically adds the
+    /// "crypt" dataset component.
+    pub fn full_name(&self) -> String {
+        // Currently, we encrypt all datasets except Crucible.
+        //
+        // Crucible already performs encryption internally, and we
+        // avoid double-encryption.
+        if self.kind.dataset_should_be_encrypted() {
+            self.full_encrypted_name()
+        } else {
+            self.full_unencrypted_name()
+        }
+    }
+
+    fn full_encrypted_name(&self) -> String {
+        format!("{}/crypt/{}", self.pool_name, self.kind)
+    }
+
+    fn full_unencrypted_name(&self) -> String {
+        format!("{}/{}", self.pool_name, self.kind)
+    }
+}
+
+/// Configuration information necessary to request a single dataset
+#[derive(
+    Clone,
+    Debug,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+    Hash,
+    PartialOrd,
+    Ord,
+)]
+pub struct DatasetConfig {
+    /// The UUID of the dataset being requested
+    pub id: DatasetUuid,
+
+    /// The dataset's name
+    pub name: DatasetName,
+
+    /// The compression mode to be supplied, if any
+    pub compression: Option<String>,
+
+    /// The upper bound on the amount of storage used by this dataset
+    pub quota: Option<usize>,
+
+    /// The lower bound on the amount of storage usable by this dataset
+    pub reservation: Option<usize>,
+}
+
+#[derive(
+    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
+)]
+pub struct DatasetsConfig {
+    /// generation number of this configuration
+    ///
+    /// This generation number is owned by the control plane (i.e., RSS or
+    /// Nexus, depending on whether RSS-to-Nexus handoff has happened).  It
+    /// should not be bumped within Sled Agent.
+    ///
+    /// Sled Agent rejects attempts to set the configuration to a generation
+    /// older than the one it's currently running.
+    ///
+    /// Note that "Generation::new()", AKA, the first generation number,
+    /// is reserved for "no datasets". This is the default configuration
+    /// for a sled before any requests have been made.
+    pub generation: Generation,
+
+    pub datasets: Vec<DatasetConfig>,
+}
+
+impl Default for DatasetsConfig {
+    fn default() -> Self {
+        Self { generation: Generation::new(), datasets: vec![] }
+    }
+}
+
+impl Ledgerable for DatasetsConfig {
+    fn is_newer_than(&self, other: &Self) -> bool {
+        self.generation > other.generation
+    }
+
+    // No need to do this, the generation number is provided externally.
+    fn generation_bump(&mut self) {}
+}
+
 /// Uniquely identifies a disk.
 #[derive(
     Debug,