Skip to content

Commit

Permalink
[sled-agent] Avoid causing UUID conflicts (#7266)
Browse files Browse the repository at this point in the history
- Avoids overwriting the value of "dataset UUID" when creating datasets
from `omicron_zones_ensure`. Instead, don't set any dataset UUID, which
lets subsequent calls to `datasets_ensure` set the right value here.

Fixes #7265
  • Loading branch information
smklein authored Dec 18, 2024
1 parent 7297d76 commit 510ef5a
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 32 deletions.
21 changes: 15 additions & 6 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,12 +962,21 @@ impl SledAgent {
continue;
};

// First, ensure the dataset exists
let dataset_id = zone.id.into_untyped_uuid();
self.inner
.storage
.upsert_filesystem(dataset_id, dataset_name)
.await?;
// NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160
//
// However, we need to ensure that all blueprints have datasets
// within them before we can remove this back-fill.
//
// Therefore, we do something hairy here: We ensure the filesystem
// exists, but don't specify any dataset UUID value.
//
// This means that:
// - If the dataset exists and has a UUID, this will be a no-op
// - If the dataset doesn't exist, it'll be created without its
// oxide:uuid zfs property set
// - If a subsequent call to "datasets_ensure" tries to set a UUID,
// it should be able to get set (once).
self.inner.storage.upsert_filesystem(None, dataset_name).await?;
}

self.inner
Expand Down
108 changes: 82 additions & 26 deletions sled-storage/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ use std::collections::HashSet;
use std::future::Future;
use tokio::sync::{mpsc, oneshot, watch};
use tokio::time::{interval, Duration, MissedTickBehavior};
use uuid::Uuid;

// The size of the mpsc bounded channel used to communicate
// between the `StorageHandle` and `StorageManager`.
Expand Down Expand Up @@ -100,7 +99,7 @@ enum StorageManagerState {

#[derive(Debug)]
pub(crate) struct NewFilesystemRequest {
dataset_id: Uuid,
dataset_id: Option<DatasetUuid>,
dataset_name: DatasetName,
responder: DebugIgnore<oneshot::Sender<Result<(), Error>>>,
}
Expand Down Expand Up @@ -526,7 +525,7 @@ impl StorageHandle {
// and ask for the set of all datasets from Nexus.
pub async fn upsert_filesystem(
&self,
dataset_id: Uuid,
dataset_id: Option<DatasetUuid>,
dataset_name: DatasetName,
) -> Result<(), Error> {
let (tx, rx) = oneshot::channel();
Expand Down Expand Up @@ -1499,27 +1498,9 @@ impl StorageManager {
zoned,
encryption_details,
size_details,
id: Some(DatasetUuid::from_untyped_uuid(request.dataset_id)),
id: request.dataset_id,
additional_options: None,
})?;
// Ensure the dataset has a usable UUID.
if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
if let Ok(id) = id_str.parse::<Uuid>() {
if id != request.dataset_id {
return Err(Error::UuidMismatch {
name: request.dataset_name.full_name(),
old: id,
new: request.dataset_id,
});
}
return Ok(());
}
}
Zfs::set_oxide_value(
&fs_name,
"uuid",
&request.dataset_id.to_string(),
)?;

Ok(())
}
Expand All @@ -1544,7 +1525,6 @@ mod tests {
use std::collections::BTreeMap;
use std::str::FromStr;
use std::sync::atomic::Ordering;
use uuid::Uuid;

// A helper struct to advance time.
struct TimeTravel {}
Expand Down Expand Up @@ -2005,16 +1985,92 @@ mod tests {
.expect("Ensuring disks should work after key manager is ready");
assert!(!result.has_error(), "{:?}", result);

// Create a filesystem on the newly formatted U.2
let dataset_id = Uuid::new_v4();
// Create a filesystem on the newly formatted U.2.
//
// We can call "upsert_filesystem" both with and without a UUID.
let dataset_id = DatasetUuid::new_v4();
let zpool_name = ZpoolName::new_external(config.disks[0].pool_id);
let dataset_name =
DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
harness
.handle()
.upsert_filesystem(Some(dataset_id), dataset_name.clone())
.await
.unwrap();
// Observe the dataset exists, and the UUID is set.
let observed_dataset = &Zfs::get_dataset_properties(
&[dataset_name.full_name()],
WhichDatasets::SelfOnly,
)
.unwrap()[0];
assert_eq!(observed_dataset.id, Some(dataset_id));

harness
.handle()
.upsert_filesystem(None, dataset_name.clone())
.await
.unwrap();
// Observe the dataset still exists, and the UUID is still set,
// even though we did not ask for a new value explicitly.
let observed_dataset = &Zfs::get_dataset_properties(
&[dataset_name.full_name()],
WhichDatasets::SelfOnly,
)
.unwrap()[0];
assert_eq!(observed_dataset.id, Some(dataset_id));

harness.cleanup().await;
logctx.cleanup_successful();
}

#[tokio::test]
async fn upsert_filesystem_no_uuid() {
illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
let logctx = test_setup_log("upsert_filesystem");
let mut harness = StorageManagerTestHarness::new(&logctx.log).await;

// Test setup: Add a U.2 and M.2, adopt them into the "control plane"
// for usage.
harness.handle().key_manager_ready().await;
let raw_disks =
harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await;
let config = harness.make_config(1, &raw_disks);
let result = harness
.handle()
.omicron_physical_disks_ensure(config.clone())
.await
.expect("Ensuring disks should work after key manager is ready");
assert!(!result.has_error(), "{:?}", result);

// Create a filesystem on the newly formatted U.2, without a UUID
let zpool_name = ZpoolName::new_external(config.disks[0].pool_id);
let dataset_name =
DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
harness
.handle()
.upsert_filesystem(dataset_id, dataset_name)
.upsert_filesystem(None, dataset_name.clone())
.await
.unwrap();
let observed_dataset = &Zfs::get_dataset_properties(
&[dataset_name.full_name()],
WhichDatasets::SelfOnly,
)
.unwrap()[0];
assert_eq!(observed_dataset.id, None);

// Later, we can set the UUID to a specific value
let dataset_id = DatasetUuid::new_v4();
harness
.handle()
.upsert_filesystem(Some(dataset_id), dataset_name.clone())
.await
.unwrap();
let observed_dataset = &Zfs::get_dataset_properties(
&[dataset_name.full_name()],
WhichDatasets::SelfOnly,
)
.unwrap()[0];
assert_eq!(observed_dataset.id, Some(dataset_id));

harness.cleanup().await;
logctx.cleanup_successful();
Expand Down

0 comments on commit 510ef5a

Please sign in to comment.