From c356cc6a1b3e674e3e8fb382e414deff19f27f3a Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Mon, 6 May 2024 15:10:51 +0000 Subject: [PATCH] WIP expose disk firmware in inventory --- Cargo.lock | 21 ++++- Cargo.toml | 2 +- installinator/src/hardware.rs | 2 +- sled-agent/src/hardware_monitor.rs | 14 ++- sled-hardware/src/disk.rs | 49 ++++++++++ sled-hardware/src/illumos/mod.rs | 109 +++++++++++++++++++---- sled-hardware/src/lib.rs | 1 + sled-storage/src/disk.rs | 45 +++++++++- sled-storage/src/manager.rs | 107 +++++++++++++++++++++- sled-storage/src/manager_test_harness.rs | 9 ++ sled-storage/src/resources.rs | 37 ++++++-- 11 files changed, 365 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8eceb218c09..57165f13291 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -542,6 +542,17 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d7e60934ceec538daadb9d8432424ed043a904d8e0243f3c6446bce549a46ac" +[[package]] +name = "bitfield-struct" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657dce144574f921af10a92876a96f0ca05dd830900598d21d91c8e4cf78f74" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -4063,8 +4074,8 @@ dependencies = [ [[package]] name = "libnvme" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/libnvme?rev=6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe#6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" +version = "0.1.1" +source = "git+https://github.com/oxidecomputer/libnvme?rev=0eed4e4929fc2311326646e818e065823ac9a695#0eed4e4929fc2311326646e818e065823ac9a695" dependencies = [ "libnvme-sys", "thiserror", @@ -4073,7 +4084,11 @@ dependencies = [ [[package]] name = "libnvme-sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/libnvme?rev=6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe#6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" +source = "git+https://github.com/oxidecomputer/libnvme?rev=0eed4e4929fc2311326646e818e065823ac9a695#0eed4e4929fc2311326646e818e065823ac9a695" +dependencies = [ + "bitfield-struct", + "static_assertions", +] [[package]] name = "libsw" diff --git a/Cargo.toml b/Cargo.toml index 9edad4dbbd8..4e6697a76ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -263,7 +263,7 @@ key-manager = { path = "key-manager" } kstat-rs = "0.2.3" libc = "0.2.153" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } -libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" } +libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "0eed4e4929fc2311326646e818e065823ac9a695" } linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index a48d816dc83..a4c2af4927b 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -31,7 +31,7 @@ impl Hardware { })?; let disks: Vec = - hardware.disks().into_iter().map(|disk| disk.into()).collect(); + hardware.disks().into_iter().map(|(_, disk)| disk.into()).collect(); info!( log, "found gimlet hardware"; diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index 6dbca89d74c..fa3a1c3e0e1 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -199,6 +199,15 @@ impl HardwareMonitor { .detected_raw_disk_removal(disk.into()) .await; } + HardwareUpdate::DiskUpdated(disk) => { + // We notify the storage manager of the hardware, but do not need to + // wait for the result to be fully processed. + #[allow(clippy::let_underscore_future)] + let _ = self + .storage_manager + .detected_raw_disk_update(disk.into()) + .await; + } }, Err(broadcast::error::RecvError::Lagged(count)) => { warn!(self.log, "Hardware monitor missed {count} messages"); @@ -277,7 +286,10 @@ impl HardwareMonitor { let _ = self .storage_manager .ensure_using_exactly_these_disks( - self.hardware_manager.disks().into_iter().map(RawDisk::from), + self.hardware_manager + .disks() + .into_iter() + .map(|(_, disk)| RawDisk::from(disk)), ) .await; } diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index d48dd88c3dc..13f35259975 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -132,6 +132,46 @@ impl DiskPaths { } } +// XXX MTZ: Make this an enum with DiskFirmware::Nvme? +#[derive( + Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Deserialize, Serialize, +)] +pub struct DiskFirmware { + active_slot: u8, + next_active_slot: Option, + slot1_read_only: bool, + slots: Vec>, +} + +impl DiskFirmware { + pub fn active_slot(&self) -> u8 { + self.active_slot + } + + pub fn next_active_slot(&self) -> Option { + self.next_active_slot + } + + pub fn slot1_read_only(&self) -> bool { + self.slot1_read_only + } + + pub fn slots(&self) -> &[Option] { + self.slots.as_slice() + } +} + +impl DiskFirmware { + pub fn new( + active_slot: u8, + next_active_slot: Option, + slot1_read_only: bool, + slots: Vec>, + ) -> Self { + Self { active_slot, next_active_slot, slot1_read_only, slots } + } +} + /// A disk which has been observed by monitoring hardware. /// /// No guarantees are made about the partitions which exist within this disk. @@ -147,6 +187,7 @@ pub struct UnparsedDisk { variant: DiskVariant, identity: DiskIdentity, is_boot_disk: bool, + firmware: DiskFirmware, } impl UnparsedDisk { @@ -157,6 +198,7 @@ impl UnparsedDisk { variant: DiskVariant, identity: DiskIdentity, is_boot_disk: bool, + firmware: DiskFirmware, ) -> Self { Self { paths: DiskPaths { devfs_path, dev_path }, @@ -164,6 +206,7 @@ impl UnparsedDisk { variant, identity, is_boot_disk, + firmware, } } @@ -190,6 +233,10 @@ impl UnparsedDisk { pub fn slot(&self) -> i64 { self.slot } + + pub fn firmware(&self) -> &DiskFirmware { + &self.firmware + } } /// A physical disk that is partitioned to contain exactly one zpool @@ -212,6 +259,7 @@ pub struct PooledDisk { // This embeds the assumtion that there is exactly one parsed zpool per // disk. pub zpool_name: ZpoolName, + pub firmware: DiskFirmware, } impl PooledDisk { @@ -252,6 +300,7 @@ impl PooledDisk { is_boot_disk: unparsed_disk.is_boot_disk, partitions, zpool_name, + firmware: unparsed_disk.firmware, }) } } diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 0bf2fa6e533..2bccb1b7420 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -2,12 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::DiskFirmware; use crate::{ DendriteAsic, DiskVariant, HardwareUpdate, SledMode, UnparsedDisk, }; use camino::Utf8PathBuf; use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; +use libnvme::{controller::Controller, Nvme}; use omicron_common::disk::DiskIdentity; use sled_hardware_types::Baseboard; use slog::debug; @@ -56,6 +58,18 @@ enum Error { #[error("Failed to issue request to sysconf: {0}")] SysconfError(#[from] sysconf::Error), + + #[error("Node {node} missing device insance")] + MissingNvmeDevinfoInstance { node: String }, + + #[error("Failed to init nvme handle: {0}")] + NvmeHandleInit(#[from] libnvme::NvmeInitError), + + #[error("libnvme error: {0}")] + NvmeError(#[from] libnvme::NvmeError), + + #[error("Unable to grab NVMe Controller lock")] + NvmeControllerLocked, } const GIMLET_ROOT_NODE_NAME: &str = "Oxide,Gimlet"; @@ -105,7 +119,7 @@ impl TryFrom for BootStorageUnit { // A snapshot of information about the underlying hardware struct HardwareSnapshot { tofino: TofinoSnapshot, - disks: HashSet, + disks: HashMap, baseboard: Baseboard, } @@ -151,7 +165,7 @@ impl HardwareSnapshot { let tofino = get_tofino_snapshot(log, &mut device_info); // Monitor for block devices. - let mut disks = HashSet::new(); + let mut disks = HashMap::new(); let mut node_walker = device_info.walk_driver("blkdev"); while let Some(node) = node_walker.next().transpose().map_err(Error::DevInfo)? @@ -184,7 +198,7 @@ enum TofinoView { // which services are currently executing. struct HardwareView { tofino: TofinoView, - disks: HashSet, + disks: HashMap, baseboard: Option, online_processor_count: u32, usable_physical_ram_bytes: u64, @@ -199,7 +213,7 @@ impl HardwareView { fn new() -> Result { Ok(Self { tofino: TofinoView::Real(TofinoSnapshot::new()), - disks: HashSet::new(), + disks: HashMap::new(), baseboard: None, online_processor_count: sysconf::online_processor_count()?, usable_physical_ram_bytes: sysconf::usable_physical_ram_bytes()?, @@ -209,7 +223,7 @@ impl HardwareView { fn new_stub_tofino(active: bool) -> Result { Ok(Self { tofino: TofinoView::Stub { active }, - disks: HashSet::new(), + disks: HashMap::new(), baseboard: None, online_processor_count: sysconf::online_processor_count()?, usable_physical_ram_bytes: sysconf::usable_physical_ram_bytes()?, @@ -250,17 +264,38 @@ impl HardwareView { polled_hw: &HardwareSnapshot, updates: &mut Vec, ) { - // In old set, not in new set. - let removed = self.disks.difference(&polled_hw.disks); - // In new set, not in old set. - let added = polled_hw.disks.difference(&self.disks); + let mut added = Vec::new(); + let mut removed = Vec::new(); + let mut updated = Vec::new(); + + // Find new or udpated disks. + for (key, value) in &polled_hw.disks { + match self.disks.get(&key) { + Some(found) => { + if value != found { + updated.push(value.clone()); + } + } + None => added.push(value.clone()), + } + } + + // Find disks which have been removed. + for (key, value) in &self.disks { + if !polled_hw.disks.contains_key(key) { + removed.push(value.clone()); + } + } use HardwareUpdate::*; for disk in removed { - updates.push(DiskRemoved(disk.clone())); + updates.push(DiskRemoved(disk)); } for disk in added { - updates.push(DiskAdded(disk.clone())); + updates.push(DiskAdded(disk)); + } + for disk in updated { + updates.push(DiskUpdated(disk)); } self.disks = polled_hw.disks.clone(); @@ -424,7 +459,7 @@ fn find_properties<'a, const N: usize>( fn poll_blkdev_node( log: &Logger, - disks: &mut HashSet, + disks: &mut HashMap, node: Node<'_>, boot_storage_unit: BootStorageUnit, ) -> Result<(), Error> { @@ -459,6 +494,13 @@ fn poll_blkdev_node( // We expect that the parent of the "blkdev" node is an "nvme" driver. let nvme_node = get_parent_node(&node, "nvme")?; + // Importantly we grab the NVMe instance and not the blkdev instance. + // Eventually we should switch the logic here to search for nvme instances + // and confirm that we only have one blkdev sibling: + // https://github.com/oxidecomputer/omicron/issues/5241 + let nvme_instance = nvme_node + .instance() + .ok_or(Error::MissingNvmeDevinfoInstance { node: node.node_name() })?; let vendor_id = i64_from_property(&find_properties(&nvme_node, ["vendor-id"])?[0])?; @@ -492,15 +534,45 @@ fn poll_blkdev_node( return Err(Error::UnrecognizedSlot { slot }); }; + let nvme = Nvme::new()?; + let controller = Controller::init_by_instance(&nvme, nvme_instance)?; + let controller_lock = match controller.try_read_lock() { + libnvme::controller::TryLockResult::Ok(locked) => locked, + // We should only hit this if something in the system has locked the + // controller in question for writing. + libnvme::controller::TryLockResult::Locked(_) => { + warn!( + log, + "NVMe Controller is already locked so we will try again + in the next hardware snapshot" + ); + return Err(Error::NvmeControllerLocked); + } + libnvme::controller::TryLockResult::Err(err) => { + return Err(Error::from(err)) + } + }; + let firmware_log_page = controller_lock.get_firmware_log_page()?; + let firmware = DiskFirmware::new( + firmware_log_page.active_slot, + firmware_log_page.next_active_slot, + firmware_log_page.slot1_is_read_only, + firmware_log_page + .slot_iter() + .map(|s| s.map(String::to_string)) + .collect(), + ); + let disk = UnparsedDisk::new( Utf8PathBuf::from(&devfs_path), dev_path, slot, variant, - device_id, + device_id.clone(), slot_is_boot_disk(slot, boot_storage_unit), + firmware.clone(), ); - disks.insert(disk); + disks.insert(device_id, disk); Ok(()) } @@ -546,8 +618,11 @@ fn poll_device_tree( // UnparsedDisks. Add those to the HardwareSnapshot here if they // are missing (which they will be for non-gimlets). for observed_disk in nongimlet_observed_disks { - if !inner.disks.contains(observed_disk) { - inner.disks.insert(observed_disk.clone()); + let identity = observed_disk.identity(); + if !inner.disks.contains_key(identity) { + inner + .disks + .insert(identity.clone(), observed_disk.clone()); } } } @@ -707,7 +782,7 @@ impl HardwareManager { self.inner.lock().unwrap().usable_physical_ram_bytes } - pub fn disks(&self) -> HashSet { + pub fn disks(&self) -> HashMap { self.inner.lock().unwrap().disks.clone() } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 607f72e25c6..d210fbb1ce9 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -34,6 +34,7 @@ pub enum HardwareUpdate { TofinoUnloaded, DiskAdded(UnparsedDisk), DiskRemoved(UnparsedDisk), + DiskUpdated(UnparsedDisk), } // The type of networking 'ASIC' the Dendrite service is expected to manage diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index c9e848559eb..333e32353a2 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -16,7 +16,8 @@ use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::{ - DiskVariant, Partition, PooledDisk, PooledDiskError, UnparsedDisk, + DiskFirmware, DiskVariant, Partition, PooledDisk, PooledDiskError, + UnparsedDisk, }; use slog::{info, Logger}; use uuid::Uuid; @@ -94,6 +95,11 @@ pub struct SyntheticDisk { // system. const SYNTHETIC_SLOT_OFFSET: i64 = 1024; +// A generic name for the firmware in slot1 of an NVMe device. +// +// bhyve for example uses "1.0" and marks slot1 as read-only. +const SYNTHETIC_FIRMWARE_SLOT1: &str = "synthetic 1.0"; + impl SyntheticDisk { // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing // it. If the zpool already exists, it is imported, but not re-created. @@ -142,6 +148,7 @@ pub struct RawSyntheticDisk { pub identity: DiskIdentity, pub variant: DiskVariant, pub slot: i64, + pub firmware: DiskFirmware, } impl RawSyntheticDisk { @@ -186,11 +193,19 @@ impl RawSyntheticDisk { model: format!("synthetic-model-{variant:?}"), }; + let firmware = DiskFirmware::new( + 1, + None, + true, + vec![Some(SYNTHETIC_FIRMWARE_SLOT1.to_string())], + ); + Ok(Self { path: path.into(), identity, variant, slot: slot + SYNTHETIC_SLOT_OFFSET, + firmware, }) } } @@ -269,6 +284,13 @@ impl RawDisk { Self::Synthetic(disk) => disk.slot, } } + + pub fn firmware(&self) -> &DiskFirmware { + match self { + RawDisk::Real(unparsed) => unparsed.firmware(), + RawDisk::Synthetic(synthetic) => &synthetic.firmware, + } + } } /// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed @@ -404,6 +426,26 @@ impl Disk { Self::Synthetic(disk) => disk.raw.slot, } } + + // Today the only update we expect to be able to apply to + // a `Disk` is firmware. + pub(crate) fn update_disk(&mut self, raw_disk: &RawDisk) { + match self { + Disk::Real(pooled_disk) => { + pooled_disk.firmware = raw_disk.firmware().clone(); + } + Disk::Synthetic(synthetic_disk) => { + synthetic_disk.raw.firmware = raw_disk.firmware().clone(); + } + } + } + + pub fn firmware(&self) -> &DiskFirmware { + match self { + Disk::Real(disk) => &disk.firmware, + Disk::Synthetic(disk) => &disk.raw.firmware, + } + } } impl From for RawDisk { @@ -416,6 +458,7 @@ impl From for RawDisk { pooled_disk.variant, pooled_disk.identity, pooled_disk.is_boot_disk, + pooled_disk.firmware, )), Disk::Synthetic(synthetic_disk) => { RawDisk::Synthetic(synthetic_disk.raw) diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 4f45f1771e6..28debe4495a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -101,6 +101,10 @@ pub(crate) enum StorageRequest { raw_disk: RawDisk, tx: DebugIgnore>>, }, + DetectedRawDiskUpdate { + raw_disk: RawDisk, + tx: DebugIgnore>>, + }, DetectedRawDiskRemoval { raw_disk: RawDisk, tx: DebugIgnore>>, @@ -187,6 +191,27 @@ impl StorageHandle { rx.map(|result| result.unwrap()) } + /// Updates a disk, if it's tracked by the storage manager, as well + /// as any associated zpools. + /// + /// Returns a future which completes once the notification has been + /// processed. Awaiting this future is optional. + pub async fn detected_raw_disk_update( + &self, + raw_disk: RawDisk, + ) -> impl Future> { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DetectedRawDiskUpdate { + raw_disk, + tx: tx.into(), + }) + .await + .unwrap(); + + rx.map(|result| result.unwrap()) + } + /// Ensures that the storage manager tracks exactly the provided disks. /// /// This acts similar to a batch [Self::detected_raw_disk] for all new disks, and @@ -388,6 +413,13 @@ impl StorageManager { } let _ = tx.0.send(result); } + StorageRequest::DetectedRawDiskUpdate { raw_disk, tx } => { + let result = self.detected_raw_disk_update(raw_disk).await; + if let Err(ref err) = &result { + warn!(self.log, "Failed to apply raw disk update"; "err" => ?err); + } + let _ = tx.0.send(result); + } StorageRequest::DetectedRawDiskRemoval { raw_disk, tx } => { self.detected_raw_disk_removal(raw_disk); let _ = tx.0.send(Ok(())); @@ -475,7 +507,7 @@ impl StorageManager { // coordination with the control plane at large. let needs_synchronization = matches!(raw_disk.variant(), DiskVariant::U2); - self.resources.insert_disk(raw_disk).await?; + self.resources.insert_or_update_disk(raw_disk).await?; if needs_synchronization { match self.state { @@ -501,6 +533,18 @@ impl StorageManager { Ok(()) } + /// Updates some information about the underlying disk within this sled. + /// + /// Things that can currently be updated: + /// - DiskFirmware + async fn detected_raw_disk_update( + &mut self, + raw_disk: RawDisk, + ) -> Result<(), Error> { + // We aren't worried about synchronizing as the disk should already be managed. + self.resources.insert_or_update_disk(raw_disk).await + } + async fn load_ledger(&self) -> Option> { let ledger_paths = self.all_omicron_disk_ledgers().await; let log = self.log.new(o!("request" => "load_ledger")); @@ -863,7 +907,7 @@ mod tests { use crate::dataset::DatasetKind; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; - use crate::resources::DiskManagementError; + use crate::resources::{DiskManagementError, ManagedDisk}; use super::*; use camino_tempfile::tempdir_in; @@ -871,6 +915,7 @@ mod tests { use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ZpoolUuid; + use sled_hardware::DiskFirmware; use std::sync::atomic::Ordering; use uuid::Uuid; @@ -1001,6 +1046,64 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn update_rawdisk_firmware() { + const FW_REV: &str = "firmware-2.0"; + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("update_u2_firmware"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + harness.handle().key_manager_ready().await; + + // Add a representative scenario for a small sled: a U.2 and M.2. + let mut raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + + // This disks should exist, but only the M.2 should have a zpool. + let all_disks_gen1 = harness.handle().get_latest_disks().await; + + for rd in &mut raw_disks { + if let RawDisk::Synthetic(ref mut disk) = rd { + let mut slots = disk.firmware.slots().to_vec(); + // "Install" a new firmware version into slot2 + slots.push(Some(FW_REV.to_string())); + disk.firmware = DiskFirmware::new( + disk.firmware.active_slot(), + disk.firmware.next_active_slot(), + disk.firmware.slot1_read_only(), + slots, + ); + } + harness.update_vdev(rd).await; + } + + let all_disks_gen2 = harness.handle().get_latest_disks().await; + + // Disks should now be different due to the mock firmware update. + assert_ne!(all_disks_gen1, all_disks_gen2); + + // Now let's verify we saw the correct firmware update. + for rd in &raw_disks { + let managed = + all_disks_gen2.values.get(rd.identity()).expect("disk exists"); + match managed { + ManagedDisk::ExplicitlyManaged(disk) + | ManagedDisk::ImplicitlyManaged(disk) => { + assert_eq!( + disk.firmware(), + rd.firmware(), + "didn't see firmware update" + ); + } + ManagedDisk::Unmanaged(disk) => { + assert_eq!(disk, rd, "didn't see firmware update"); + } + } + } + + harness.cleanup().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn wait_for_boot_disk() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); diff --git a/sled-storage/src/manager_test_harness.rs b/sled-storage/src/manager_test_harness.rs index a2180a95b53..b289b17f079 100644 --- a/sled-storage/src/manager_test_harness.rs +++ b/sled-storage/src/manager_test_harness.rs @@ -300,6 +300,15 @@ impl StorageManagerTestHarness { .expect("Failed to remove vdev"); } + // XXX MTZ: Provide a vdev update aka new firmware. + pub async fn update_vdev(&mut self, raw: &RawDisk) { + self.handle + .detected_raw_disk_update(raw.clone()) + .await + .await + .expect("Failed to update vdev"); + } + // Adds a vdev to the set of "tracked" devices. pub async fn add_vdev_as(&mut self, raw_disk: RawDisk) { self.handle diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index a2e75249b3a..89ba9514fc3 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -478,7 +478,7 @@ impl StorageResources { Ok(ManagedDisk::ExplicitlyManaged(disk)) } - /// Tracks a new disk. + /// Tracks a new disk, or updates an existing disk. /// /// For U.2s: Does not automatically attempt to manage disks -- for this, /// the caller will need to also invoke @@ -486,18 +486,45 @@ impl StorageResources { /// /// For M.2s: As no additional control plane guidance is necessary to adopt /// M.2s, these are automatically managed. - pub(crate) async fn insert_disk( + pub(crate) async fn insert_or_update_disk( &mut self, disk: RawDisk, ) -> Result<(), Error> { let disk_identity = disk.identity().clone(); info!(self.log, "Inserting disk"; "identity" => ?disk_identity); - if self.disks.values.contains_key(&disk_identity) { - info!(self.log, "Disk already exists"; "identity" => ?disk_identity); + + // XXX MTZ: Is this okay to do if we find an existing disk with no updates to apply? + let disks = Arc::make_mut(&mut self.disks.values); + + // First check if there are any updates we need to apply to existing managed disks. + if let Some(managed) = disks.get_mut(&disk_identity) { + let mut updated = false; + match managed { + ManagedDisk::ExplicitlyManaged(mdisk) + | ManagedDisk::ImplicitlyManaged(mdisk) => { + let old = RawDisk::from(mdisk.clone()); + if old != disk { + mdisk.update_disk(&disk); + updated = true; + } + } + ManagedDisk::Unmanaged(raw) => { + if raw != &disk { + *raw = disk; + updated = true; + } + } + }; + + if updated { + self.disk_updates.send_replace(self.disks.clone()); + } else { + info!(self.log, "Disk already exists and has no updates"; "identity" => ?disk_identity); + } + return Ok(()); } - let disks = Arc::make_mut(&mut self.disks.values); match disk.variant() { DiskVariant::U2 => { disks.insert(disk_identity, ManagedDisk::Unmanaged(disk));