From ab57c4671e5a0ffdf7f050eb29a7ba3afa4c5fcc Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 28 Sep 2023 22:51:06 +0000 Subject: [PATCH] wip --- common/src/disk.rs | 2 +- sled-storage/src/error.rs | 3 +- sled-storage/src/lib.rs | 3 +- sled-storage/src/manager.rs | 110 ++++++++++++++++++++ sled-storage/src/pool.rs | 10 +- sled-storage/src/{state.rs => resources.rs} | 30 ++++-- 6 files changed, 141 insertions(+), 17 deletions(-) create mode 100644 sled-storage/src/manager.rs rename sled-storage/src/{state.rs => resources.rs} (81%) diff --git a/common/src/disk.rs b/common/src/disk.rs index 3ea8091326..3ae9c31e01 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -5,7 +5,7 @@ //! Disk related types shared among crates /// Uniquely identifies a disk. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct DiskIdentity { pub vendor: String, pub serial: String, diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs index 04c4f7ec07..fbf721fab7 100644 --- a/sled-storage/src/error.rs +++ b/sled-storage/src/error.rs @@ -5,6 +5,7 @@ //! Storage related errors use crate::dataset::DatasetName; +use crate::disk::DiskError; use camino::Utf8PathBuf; use omicron_common::api::external::ByteCountRangeError; use uuid::Uuid; @@ -12,7 +13,7 @@ use uuid::Uuid; #[derive(thiserror::Error, Debug)] pub enum Error { #[error(transparent)] - DiskError(#[from] sled_hardware::PooledDiskError), + DiskError(#[from] DiskError), // TODO: We could add the context of "why are we doint this op", maybe? #[error(transparent)] diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs index 783eaf6642..f923165896 100644 --- a/sled-storage/src/lib.rs +++ b/sled-storage/src/lib.rs @@ -13,5 +13,6 @@ pub(crate) mod disk; pub(crate) mod dump_setup; pub mod error; pub(crate) mod keyfile; +pub mod manager; pub(crate) mod pool; -pub mod state; +pub mod resources; diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs new file mode 100644 index 0000000000..dbbe5fb57a --- /dev/null +++ b/sled-storage/src/manager.rs @@ -0,0 +1,110 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The storage manager task + +use std::collections::{BTreeSet, HashSet}; + +use crate::dataset::DatasetError; +use crate::disk::{Disk, DiskError, DiskWrapper}; +use crate::error::Error; +use crate::resources::StorageResources; +use derive_more::From; +use illumos_utils::zpool::{ZpoolKind, ZpoolName}; +use key_manager::StorageKeyRequester; +use omicron_common::disk::DiskIdentity; +use sled_hardware::{DiskVariant, UnparsedDisk}; +use slog::{error, info, o, warn, Logger}; +use tokio::sync::{mpsc, oneshot}; + +// The size of the mpsc bounded channel used to communicate +// between the `StorageHandle` and `StorageManager`. +const QUEUE_SIZE: usize = 256; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum StorageManagerStage { + WaitingForBootDisk, + WaitingForKeyManager, + QueuingDisks, + Normal, +} + +enum StorageRequest {} + +/// A mechanism for interacting with the [`StorageManager`] +pub struct StorageHandle { + tx: mpsc::Sender, +} + +/// The storage manager responsible for the state of the storage +/// on a sled. The storage manager runs in its own task and is interacted +/// with via the [`StorageHandle`]. +pub struct StorageManager { + log: Logger, + stage: StorageManagerStage, + rx: mpsc::Receiver, + resources: StorageResources, + queued_u2_drives: HashSet, + queued_synthetic_u2_drives: BTreeSet, + key_requester: StorageKeyRequester, +} + +impl StorageManager { + pub fn new( + log: &Logger, + key_requester: StorageKeyRequester, + ) -> (StorageManager, StorageHandle) { + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + ( + StorageManager { + log: log.new(o!("component" => "StorageManager")), + stage: StorageManagerStage::WaitingForBootDisk, + rx, + resources: StorageResources::default(), + queued_u2_drives: HashSet::new(), + queued_synthetic_u2_drives: BTreeSet::new(), + key_requester, + }, + StorageHandle { tx }, + ) + } + + /// Add a disk to storage resources or queue it to be added later + async fn add_u2_disk( + &mut self, + unparsed_disk: UnparsedDisk, + ) -> Result<(), Error> { + if self.stage != StorageManagerStage::Normal { + self.queued_u2_drives.insert(unparsed_disk); + return Ok(()); + } + + match Disk::new( + &self.log, + unparsed_disk.clone(), + Some(&self.key_requester), + ) + .await + { + Ok(disk) => self.resources.insert_real_disk(disk), + Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => { + warn!( + self.log, + "Transient error: {err} - queuing disk {:?}", unparsed_disk + ); + self.queued_u2_drives.insert(unparsed_disk); + self.stage = StorageManagerStage::QueuingDisks; + Err(err.into()) + } + Err(err) => { + error!( + self.log, + "Persistent error: {err} - not queueing disk {:?}", + unparsed_disk + ); + Err(err.into()) + } + } + } +} diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs index 1abf43c1de..a16722537d 100644 --- a/sled-storage/src/pool.rs +++ b/sled-storage/src/pool.rs @@ -16,9 +16,9 @@ use illumos_utils::zpool::Zpool; /// A ZFS storage pool #[derive(Debug, Clone)] pub struct Pool { - name: ZpoolName, - info: ZpoolInfo, - parent: DiskIdentity, + pub name: ZpoolName, + pub info: ZpoolInfo, + pub parent: DiskIdentity, } impl Pool { @@ -29,8 +29,4 @@ impl Pool { let info = Zpool::get_info(&name.to_string())?; Ok(Pool { name, info, parent }) } - - pub fn parent(&self) -> &DiskIdentity { - &self.parent - } } diff --git a/sled-storage/src/state.rs b/sled-storage/src/resources.rs similarity index 81% rename from sled-storage/src/state.rs rename to sled-storage/src/resources.rs index 8a0be34f63..0e874be522 100644 --- a/sled-storage/src/state.rs +++ b/sled-storage/src/resources.rs @@ -2,13 +2,15 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! The internal state of the storage manager task +//! Discovered and usable disks and zpools use crate::dataset::M2_DEBUG_DATASET; -use crate::disk::DiskWrapper; +use crate::disk::{Disk, DiskWrapper}; +use crate::error::Error; use crate::pool::Pool; use camino::Utf8PathBuf; use illumos_utils::zpool::ZpoolName; +use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; use std::collections::BTreeMap; @@ -21,7 +23,7 @@ const BUNDLE_DIRECTORY: &str = "bundle"; // The directory for zone bundles. const ZONE_BUNDLE_DIRECTORY: &str = "zone"; -/// Storage related state +/// Storage related resources: disks and zpools /// /// This state is internal to the [`crate::StorageManager`] task. Clones /// of this state, or subsets of it, can be retrieved by requests to the @@ -34,10 +36,10 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone"; /// inside the `StorageManager` task if there are any outstanding copies. /// Therefore, we only pay the cost to update infrequently, and no locks are /// required by callers when operating on cloned data. The only contention here -/// is for the refrence counters of the internal Arcs when `State` gets cloned +/// is for the refrence counters of the internal Arcs when `StorageResources` gets cloned /// or dropped. -#[derive(Debug, Clone)] -pub struct State { +#[derive(Debug, Clone, Default)] +pub struct StorageResources { // All disks, real and synthetic, being managed by this sled disks: Arc>, @@ -45,7 +47,21 @@ pub struct State { pools: Arc>, } -impl State { +impl StorageResources { + /// Insert a disk and its zpool + pub(crate) fn insert_real_disk(&mut self, disk: Disk) -> Result<(), Error> { + let parent = disk.identity().clone(); + let zpool_name = disk.zpool_name().clone(); + let disk = DiskWrapper::Real { + disk: disk.clone(), + devfs_path: disk.devfs_path().clone(), + }; + Arc::make_mut(&mut self.disks).insert(disk.identity(), disk); + let zpool = Pool::new(zpool_name, parent)?; + Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool); + Ok(()) + } + /// Returns the identity of the boot disk. /// /// If this returns `None`, we have not processed the boot disk yet.