From 510ef5a7849afa01ce28a706e109cbd1cf360370 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 18 Dec 2024 13:35:24 -0800 Subject: [PATCH] [sled-agent] Avoid causing UUID conflicts (#7266) - Avoids overwriting the value of "dataset UUID" when creating datasets from `omicron_zones_ensure`. Instead, don't set any dataset UUID, which lets subsequent calls to `datasets_ensure` set the right value here. Fixes https://github.com/oxidecomputer/omicron/issues/7265 --- sled-agent/src/sled_agent.rs | 21 +++++-- sled-storage/src/manager.rs | 108 ++++++++++++++++++++++++++--------- 2 files changed, 97 insertions(+), 32 deletions(-) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b9bf703933..37526690cb 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -962,12 +962,21 @@ impl SledAgent { continue; }; - // First, ensure the dataset exists - let dataset_id = zone.id.into_untyped_uuid(); - self.inner - .storage - .upsert_filesystem(dataset_id, dataset_name) - .await?; + // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 + // + // However, we need to ensure that all blueprints have datasets + // within them before we can remove this back-fill. + // + // Therefore, we do something hairy here: We ensure the filesystem + // exists, but don't specify any dataset UUID value. + // + // This means that: + // - If the dataset exists and has a UUID, this will be a no-op + // - If the dataset doesn't exist, it'll be created without its + // oxide:uuid zfs property set + // - If a subsequent call to "datasets_ensure" tries to set a UUID, + // it should be able to get set (once). + self.inner.storage.upsert_filesystem(None, dataset_name).await?; } self.inner diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index f0653c7905..d6ffd42d0a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -35,7 +35,6 @@ use std::collections::HashSet; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; -use uuid::Uuid; // The size of the mpsc bounded channel used to communicate // between the `StorageHandle` and `StorageManager`. @@ -100,7 +99,7 @@ enum StorageManagerState { #[derive(Debug)] pub(crate) struct NewFilesystemRequest { - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, responder: DebugIgnore>>, } @@ -526,7 +525,7 @@ impl StorageHandle { // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); @@ -1499,27 +1498,9 @@ impl StorageManager { zoned, encryption_details, size_details, - id: Some(DatasetUuid::from_untyped_uuid(request.dataset_id)), + id: request.dataset_id, additional_options: None, })?; - // Ensure the dataset has a usable UUID. - if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { - if let Ok(id) = id_str.parse::() { - if id != request.dataset_id { - return Err(Error::UuidMismatch { - name: request.dataset_name.full_name(), - old: id, - new: request.dataset_id, - }); - } - return Ok(()); - } - } - Zfs::set_oxide_value( - &fs_name, - "uuid", - &request.dataset_id.to_string(), - )?; Ok(()) } @@ -1544,7 +1525,6 @@ mod tests { use std::collections::BTreeMap; use std::str::FromStr; use std::sync::atomic::Ordering; - use uuid::Uuid; // A helper struct to advance time. struct TimeTravel {} @@ -2005,16 +1985,92 @@ mod tests { .expect("Ensuring disks should work after key manager is ready"); assert!(!result.has_error(), "{:?}", result); - // Create a filesystem on the newly formatted U.2 - let dataset_id = Uuid::new_v4(); + // Create a filesystem on the newly formatted U.2. + // + // We can call "upsert_filesystem" both with and without a UUID. + let dataset_id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let dataset_name = + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + harness + .handle() + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + // Observe the dataset exists, and the UUID is set. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness + .handle() + .upsert_filesystem(None, dataset_name.clone()) + .await + .unwrap(); + // Observe the dataset still exists, and the UUID is still set, + // even though we did not ask for a new value explicitly. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn upsert_filesystem_no_uuid() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("upsert_filesystem"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a filesystem on the newly formatted U.2, without a UUID let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() - .upsert_filesystem(dataset_id, dataset_name) + .upsert_filesystem(None, dataset_name.clone()) .await .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, None); + + // Later, we can set the UUID to a specific value + let dataset_id = DatasetUuid::new_v4(); + harness + .handle() + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); harness.cleanup().await; logctx.cleanup_successful();