oxidecomputer · smklein · Aug 28, 2024 · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
@@ -10,13 +10,14 @@ use crate::{
 };
 use oxnet::{IpNet, Ipv4Net, Ipv6Net};
 use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
+use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
 use std::{
     collections::{HashMap, HashSet},
     fmt,
     net::{IpAddr, Ipv4Addr, Ipv6Addr},
     str::FromStr,
 };
+use strum::EnumCount;
 use uuid::Uuid;
 
 /// The type of network interface
@@ -704,33 +705,142 @@ pub struct ResolvedVpcRouteSet {
 
 /// Describes the purpose of the dataset.
 #[derive(
-    Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq,
+    Debug, JsonSchema, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount,
 )]
-#[serde(rename_all = "snake_case")]
+#[serde(tag = "type", rename_all = "snake_case")]
 pub enum DatasetKind {
-    Crucible,
+    // Durable datasets for zones
+    #[serde(rename = "cockroachdb")]
     Cockroach,
+    Crucible,
     Clickhouse,
     ClickhouseKeeper,
     ExternalDns,
     InternalDns,
+
+    // Zone filesystems
+    ZoneRoot,
+    Zone {
+        name: String,
+    },
+
+    // Other datasets
+    Debug,
+}
+
+impl Serialize for DatasetKind {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_str(&self.to_string())
+    }
+}
+
+impl<'de> Deserialize<'de> for DatasetKind {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let s = String::deserialize(deserializer)?;
+        s.parse().map_err(de::Error::custom)
+    }
 }
 
+impl DatasetKind {
+    pub fn dataset_should_be_encrypted(&self) -> bool {
+        match self {
+            // We encrypt all datasets except Crucible.
+            //
+            // Crucible already performs encryption internally, and we
+            // avoid double-encryption.
+            DatasetKind::Crucible => false,
+            _ => true,
+        }
+    }
+
+    /// Returns true if this dataset is delegated to a non-global zone.
+    pub fn zoned(&self) -> bool {
+        use DatasetKind::*;
+        match self {
+            Cockroach | Crucible | Clickhouse | ClickhouseKeeper
+            | ExternalDns | InternalDns => true,
+            ZoneRoot | Zone { .. } | Debug => false,
+        }
+    }
+
+    /// Returns the zone name, if this is a dataset for a zone filesystem.
+    ///
+    /// Otherwise, returns "None".
+    pub fn zone_name(&self) -> Option<&str> {
+        if let DatasetKind::Zone { name } = self {
+            Some(name)
+        } else {
+            None
+        }
+    }
+}
+
+// Be cautious updating this implementation:
+//
+// - It should align with [DatasetKind::FromStr], below
+// - The strings here are used here comprise the dataset name, stored durably
+// on-disk
 impl fmt::Display for DatasetKind {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         use DatasetKind::*;
         let s = match self {
             Crucible => "crucible",
-            Cockroach => "cockroach",
+            Cockroach => "cockroachdb",
 impl ZoneKind { 
 impl ZoneKind { 
             Clickhouse => "clickhouse",
             ClickhouseKeeper => "clickhouse_keeper",
             ExternalDns => "external_dns",
             InternalDns => "internal_dns",
+            ZoneRoot => "zone",
+            Zone { name } => {
+                write!(f, "zone/{}", name)?;
+                return Ok(());
+            }
+            Debug => "debug",
         };
         write!(f, "{}", s)
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetKindParseError {
+    #[error("Dataset unknown: {0}")]
+    UnknownDataset(String),
+}
+
+impl FromStr for DatasetKind {
+    type Err = DatasetKindParseError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use DatasetKind::*;
+        let kind = match s {
+            "cockroachdb" => Cockroach,
+            "crucible" => Crucible,
+            "clickhouse" => Clickhouse,
+            "clickhouse_keeper" => ClickhouseKeeper,
+            "external_dns" => ExternalDns,
+            "internal_dns" => InternalDns,
+            "zone" => ZoneRoot,
+            "debug" => Debug,
+            other => {
+                if let Some(name) = other.strip_prefix("zone/") {
+                    Zone { name: name.to_string() }
+                } else {
+                    return Err(DatasetKindParseError::UnknownDataset(
+                        s.to_string(),
+                    ));
+                }
+            }
+        };
+        Ok(kind)
+    }
+}
+
 /// Identifiers for a single sled.
 ///
 /// This is intended primarily to be used in timeseries, to identify
@@ -753,6 +863,7 @@ pub struct SledIdentifiers {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use crate::api::internal::shared::AllowedSourceIps;
     use oxnet::{IpNet, Ipv4Net, Ipv6Net};
     use std::net::{Ipv4Addr, Ipv6Addr};
@@ -797,4 +908,48 @@ mod tests {
             serde_json::from_str(r#"{"allow":"any"}"#).unwrap(),
         );
     }
+
+    #[test]
+    fn test_dataset_kind_serialization() {
+        let kinds = [
+            DatasetKind::Crucible,
+            DatasetKind::Cockroach,
+            DatasetKind::Clickhouse,
+            DatasetKind::ClickhouseKeeper,
+            DatasetKind::ExternalDns,
+            DatasetKind::InternalDns,
+            DatasetKind::ZoneRoot,
+            DatasetKind::Zone { name: String::from("myzone") },
+            DatasetKind::Debug,
+        ];
+
+        assert_eq!(kinds.len(), DatasetKind::COUNT);
+
+        for kind in &kinds {
+            // To string, from string
+            let as_str = kind.to_string();
+            let from_str =
+                DatasetKind::from_str(&as_str).unwrap_or_else(|_| {
+                    panic!("Failed to convert {kind} to and from string")
+                });
+            assert_eq!(
+                *kind, from_str,
+                "{kind} failed to convert to/from a string"
+            );
+
+            // Serialize, deserialize
+            let ser = serde_json::to_string(&kind)
+                .unwrap_or_else(|_| panic!("Failed to serialize {kind}"));
+            let de: DatasetKind = serde_json::from_str(&ser)
+                .unwrap_or_else(|_| panic!("Failed to deserialize {kind}"));
+            assert_eq!(*kind, de, "{kind} failed serialization");
+
+            // Test that serialization is equivalent to stringifying.
+            assert_eq!(
+                format!("\"{as_str}\""),
+                ser,
+                "{kind} does not match stringification/serialization"
+            );
+        }
+    }
 }
diff --git a/common/src/disk.rs b/common/src/disk.rs
@@ -4,15 +4,21 @@
 
 //! Disk related types shared among crates
 
+use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::ZpoolUuid;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
 use uuid::Uuid;
 
 use crate::{
-    api::external::Generation, ledger::Ledgerable, zpool_name::ZpoolKind,
+    api::external::Generation,
+    ledger::Ledgerable,
+    zpool_name::{ZpoolKind, ZpoolName},
 };
 
+pub use crate::api::internal::shared::DatasetKind;
+
 #[derive(
     Clone,
     Debug,
@@ -69,6 +75,130 @@ impl OmicronPhysicalDisksConfig {
     }
 }
 
+#[derive(
+    Debug,
+    PartialEq,
+    Eq,
+    Hash,
+    Serialize,
+    Deserialize,
+    Clone,
+    JsonSchema,
+    PartialOrd,
+    Ord,
+)]
+pub struct DatasetName {
+    // A unique identifier for the Zpool on which the dataset is stored.
+    pool_name: ZpoolName,
+    // A name for the dataset within the Zpool.
+    kind: DatasetKind,
+}
+
+impl DatasetName {
+    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
+        Self { pool_name, kind }
+    }
+
+    pub fn pool(&self) -> &ZpoolName {
+        &self.pool_name
+    }
+
+    pub fn dataset(&self) -> &DatasetKind {
+        &self.kind
+    }
+
+    /// Returns the full name of the dataset, as would be returned from
+    /// "zfs get" or "zfs list".
+    ///
+    /// If this dataset should be encrypted, this automatically adds the
+    /// "crypt" dataset component.
+    pub fn full_name(&self) -> String {
+        // Currently, we encrypt all datasets except Crucible.
+        //
+        // Crucible already performs encryption internally, and we
+        // avoid double-encryption.
+        if self.kind.dataset_should_be_encrypted() {
+            self.full_encrypted_name()
+        } else {
+            self.full_unencrypted_name()
+        }
+    }
+
+    fn full_encrypted_name(&self) -> String {
+        format!("{}/crypt/{}", self.pool_name, self.kind)
+    }
+
+    fn full_unencrypted_name(&self) -> String {
+        format!("{}/{}", self.pool_name, self.kind)
+    }
+}
+
+/// Configuration information necessary to request a single dataset
+#[derive(
+    Clone,
+    Debug,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+    Hash,
+    PartialOrd,
+    Ord,
+)]
+pub struct DatasetConfig {
+    /// The UUID of the dataset being requested
+    pub id: DatasetUuid,
+
+    /// The dataset's name
+    pub name: DatasetName,
+
+    /// The compression mode to be supplied, if any
+    pub compression: Option<String>,
+
+    /// The upper bound on the amount of storage used by this dataset
+    pub quota: Option<usize>,
+
+    /// The lower bound on the amount of storage usable by this dataset
+    pub reservation: Option<usize>,
+}
+
+#[derive(
+    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
+)]
+pub struct DatasetsConfig {
+    /// generation number of this configuration
+    ///
+    /// This generation number is owned by the control plane (i.e., RSS or
+    /// Nexus, depending on whether RSS-to-Nexus handoff has happened).  It
+    /// should not be bumped within Sled Agent.
+    ///
+    /// Sled Agent rejects attempts to set the configuration to a generation
+    /// older than the one it's currently running.
+    ///
+    /// Note that "Generation::new()", AKA, the first generation number,
+    /// is reserved for "no datasets". This is the default configuration
+    /// for a sled before any requests have been made.
+    pub generation: Generation,
+
+    pub datasets: BTreeMap<DatasetUuid, DatasetConfig>,
+}
+
+impl Default for DatasetsConfig {
+    fn default() -> Self {
+        Self { generation: Generation::new(), datasets: BTreeMap::new() }
+    }
+}
+
+impl Ledgerable for DatasetsConfig {
+    fn is_newer_than(&self, other: &Self) -> bool {
+        self.generation > other.generation
+    }
+
+    // No need to do this, the generation number is provided externally.
+    fn generation_bump(&mut self) {}
+}
+
 /// Uniquely identifies a disk.
 #[derive(
     Debug,