Skip to content

Commit

Permalink
[sled agent] API to manage datasets explicitly (#6144)
Browse files Browse the repository at this point in the history
This PR exposes an API from the Sled Agent which allows Nexus to
configure datasets independently from Zones.

Here's an example subset of `zfs list -o name` on a deployed system,
with some annotations in-line

```bash
# This is the pool of an arbitrary U.2
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980

# Crucible has a dataset that isn't encrypted at the ZFS layer, because it's encrypted internally...
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crucible
# ... and it contains a lot of region datasets.
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crucible/regions/...

# We have a dataset which uses a trust-quorum-derived encryption key.
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt
# Durable datasets (e.g. Cockroach's) can be stored in here.
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/cockroachdb
# The "debug" dataset has been historically created by + managed by the Sled Agent.
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/debug
# Transient zone filesystems also exist here, and are encrypted.
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3
oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_crucible_a232eba2-e94f-4592-a5a6-ec23f9be3296 
```

## History

Prior to this PR, the sled agent exposed no interfaces to **explicitly**
manage datasets on their own. Datasets could be created one of two ways:

1. Created and managed by the sled agent, without telling Nexus. See:
the `debug` dataset.
2. Created in response to requests from Nexus to create zones. See:
`crucible`, `cockroachdb`, and the `zone` filesystems above.

These APIs did not provide a significant amount of control over dataset
usage, and provided no mechanism for setting quotas and reservations.

## This PR

- Expands Nexus' notion of "dataset kind" to include the following
variants:
  - `zone_root`, for the `crypt/zone` dataset, 
- `zone`, for any dataset within `crypt/zone` (e.g.,
`crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3`).
  - `debug` for the `crypt/debug` dataset.
- Adds two endpoints to Sled Agent: `datasets_put`, and `datasets_get`,
for setting a configuration of expected datasets. At the moment,
`datasets_put` is purely additive, and does not remove any missing
datasets.
- This API provides a mechanism for Nexus to manage quotas and
reservations, which it will do in the future.

This PR is related to
#6167, which provides
additional tooling through the inventory for inspecting dataset state on
deployed sleds.

Fixes #6042,
#6107

---------

Co-authored-by: Rain <[email protected]>
  • Loading branch information
smklein and sunshowers authored Aug 28, 2024
1 parent c3c5f84 commit 648507d
Show file tree
Hide file tree
Showing 43 changed files with 1,706 additions and 300 deletions.
1 change: 1 addition & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ progenitor::generate_api!(
replace = {
Baseboard = nexus_sled_agent_shared::inventory::Baseboard,
ByteCount = omicron_common::api::external::ByteCount,
DatasetKind = omicron_common::api::internal::shared::DatasetKind,
DiskIdentity = omicron_common::disk::DiskIdentity,
DiskVariant = omicron_common::disk::DiskVariant,
Generation = omicron_common::api::external::Generation,
Expand Down
188 changes: 181 additions & 7 deletions common/src/api/internal/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ use crate::{
};
use oxnet::{IpNet, Ipv4Net, Ipv6Net};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use std::{
collections::{HashMap, HashSet},
fmt,
net::{IpAddr, Ipv4Addr, Ipv6Addr},
str::FromStr,
};
use strum::EnumCount;
use uuid::Uuid;

use super::nexus::HostIdentifier;
Expand Down Expand Up @@ -837,13 +838,11 @@ pub struct ResolvedVpcRouteSet {
}

/// Describes the purpose of the dataset.
#[derive(
Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq,
)]
#[serde(rename_all = "snake_case")]
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount)]
pub enum DatasetKind {
Crucible,
// Durable datasets for zones
Cockroach,
Crucible,
/// Used for single-node clickhouse deployments
Clickhouse,
/// Used for replicated clickhouse deployments
Expand All @@ -852,24 +851,153 @@ pub enum DatasetKind {
ClickhouseServer,
ExternalDns,
InternalDns,

// Zone filesystems
ZoneRoot,
Zone {
name: String,
},

// Other datasets
Debug,
}

impl Serialize for DatasetKind {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.to_string())
}
}

impl<'de> Deserialize<'de> for DatasetKind {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse().map_err(de::Error::custom)
}
}

impl JsonSchema for DatasetKind {
fn schema_name() -> String {
"DatasetKind".to_string()
}

fn json_schema(
gen: &mut schemars::gen::SchemaGenerator,
) -> schemars::schema::Schema {
// The schema is a bit more complicated than this -- it's either one of
// the fixed values or a string starting with "zone/" -- but this is
// good enough for now.
let mut schema = <String>::json_schema(gen).into_object();
schema.metadata().description = Some(
"The kind of dataset. See the `DatasetKind` enum \
in omicron-common for possible values."
.to_owned(),
);
schema.into()
}
}

impl DatasetKind {
pub fn dataset_should_be_encrypted(&self) -> bool {
match self {
// We encrypt all datasets except Crucible.
//
// Crucible already performs encryption internally, and we
// avoid double-encryption.
DatasetKind::Crucible => false,
_ => true,
}
}

/// Returns true if this dataset is delegated to a non-global zone.
pub fn zoned(&self) -> bool {
use DatasetKind::*;
match self {
Cockroach | Crucible | Clickhouse | ClickhouseKeeper
| ClickhouseServer | ExternalDns | InternalDns => true,
ZoneRoot | Zone { .. } | Debug => false,
}
}

/// Returns the zone name, if this is a dataset for a zone filesystem.
///
/// Otherwise, returns "None".
pub fn zone_name(&self) -> Option<&str> {
if let DatasetKind::Zone { name } = self {
Some(name)
} else {
None
}
}
}

// Be cautious updating this implementation:
//
// - It should align with [DatasetKind::FromStr], below
// - The strings here are used here comprise the dataset name, stored durably
// on-disk
impl fmt::Display for DatasetKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use DatasetKind::*;
let s = match self {
Crucible => "crucible",
Cockroach => "cockroach",
Cockroach => "cockroachdb",
Clickhouse => "clickhouse",
ClickhouseKeeper => "clickhouse_keeper",
ClickhouseServer => "clickhouse_server",
ExternalDns => "external_dns",
InternalDns => "internal_dns",
ZoneRoot => "zone",
Zone { name } => {
write!(f, "zone/{}", name)?;
return Ok(());
}
Debug => "debug",
};
write!(f, "{}", s)
}
}

#[derive(Debug, thiserror::Error)]
pub enum DatasetKindParseError {
#[error("Dataset unknown: {0}")]
UnknownDataset(String),
}

impl FromStr for DatasetKind {
type Err = DatasetKindParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
use DatasetKind::*;
let kind = match s {
"cockroachdb" => Cockroach,
"crucible" => Crucible,
"clickhouse" => Clickhouse,
"clickhouse_keeper" => ClickhouseKeeper,
"clickhouse_server" => ClickhouseServer,
"external_dns" => ExternalDns,
"internal_dns" => InternalDns,
"zone" => ZoneRoot,
"debug" => Debug,
other => {
if let Some(name) = other.strip_prefix("zone/") {
Zone { name: name.to_string() }
} else {
return Err(DatasetKindParseError::UnknownDataset(
s.to_string(),
));
}
}
};
Ok(kind)
}
}

/// Identifiers for a single sled.
///
/// This is intended primarily to be used in timeseries, to identify
Expand All @@ -892,6 +1020,7 @@ pub struct SledIdentifiers {

#[cfg(test)]
mod tests {
use super::*;
use crate::api::internal::shared::AllowedSourceIps;
use oxnet::{IpNet, Ipv4Net, Ipv6Net};
use std::net::{Ipv4Addr, Ipv6Addr};
Expand Down Expand Up @@ -936,4 +1065,49 @@ mod tests {
serde_json::from_str(r#"{"allow":"any"}"#).unwrap(),
);
}

#[test]
fn test_dataset_kind_serialization() {
let kinds = [
DatasetKind::Cockroach,
DatasetKind::Crucible,
DatasetKind::Clickhouse,
DatasetKind::ClickhouseKeeper,
DatasetKind::ClickhouseServer,
DatasetKind::ExternalDns,
DatasetKind::InternalDns,
DatasetKind::ZoneRoot,
DatasetKind::Zone { name: String::from("myzone") },
DatasetKind::Debug,
];

assert_eq!(kinds.len(), DatasetKind::COUNT);

for kind in &kinds {
// To string, from string
let as_str = kind.to_string();
let from_str =
DatasetKind::from_str(&as_str).unwrap_or_else(|_| {
panic!("Failed to convert {kind} to and from string")
});
assert_eq!(
*kind, from_str,
"{kind} failed to convert to/from a string"
);

// Serialize, deserialize
let ser = serde_json::to_string(&kind)
.unwrap_or_else(|_| panic!("Failed to serialize {kind}"));
let de: DatasetKind = serde_json::from_str(&ser)
.unwrap_or_else(|_| panic!("Failed to deserialize {kind}"));
assert_eq!(*kind, de, "{kind} failed serialization");

// Test that serialization is equivalent to stringifying.
assert_eq!(
format!("\"{as_str}\""),
ser,
"{kind} does not match stringification/serialization"
);
}
}
}
Loading

0 comments on commit 648507d

Please sign in to comment.