From 1b5656200108d17cb5e497145bef117bae77bc97 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Tue, 27 Feb 2024 22:52:42 +0000 Subject: [PATCH 1/8] sled-agent needs to deal with SSD overprovisioning --- Cargo.lock | 15 ++ Cargo.toml | 1 + sled-hardware/Cargo.toml | 1 + sled-hardware/src/disk.rs | 6 +- sled-hardware/src/illumos/mod.rs | 2 +- sled-hardware/src/illumos/partitions.rs | 193 +++++++++++++++++++++++- test-utils/src/dev/mod.rs | 11 ++ 7 files changed, 225 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85b7e5a186..a77a1e17d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3858,6 +3858,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "libnvme" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/libnvme?rev=6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe#6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" +dependencies = [ + "libnvme-sys", + "thiserror", +] + +[[package]] +name = "libnvme-sys" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/libnvme?rev=6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe#6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" + [[package]] name = "libsw" version = "3.3.1" @@ -8187,6 +8201,7 @@ dependencies = [ "illumos-utils", "libc", "libefi-illumos", + "libnvme", "macaddr", "omicron-common", "omicron-test-utils", diff --git a/Cargo.toml b/Cargo.toml index db37547ea0..31768de24f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -245,6 +245,7 @@ itertools = "0.12.1" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" libc = "0.2.153" +libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" } linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 3d1259f46f..24a49ae714 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -28,6 +28,7 @@ omicron-workspace-hack.workspace = true [target.'cfg(target_os = "illumos")'.dependencies] illumos-devinfo = { git = "https://github.com/oxidecomputer/illumos-devinfo", branch = "main" } libefi-illumos = { git = "https://github.com/oxidecomputer/libefi-illumos", branch = "master" } +libnvme.workspace = true [dev-dependencies] illumos-utils = { workspace = true, features = ["testing"] } diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 44658658be..6a4c968f14 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -38,6 +38,8 @@ pub enum PooledDiskError { CannotFormatMissingDevPath { path: Utf8PathBuf }, #[error("Formatting M.2 devices is not yet implemented")] CannotFormatM2NotImplemented, + #[error(transparent)] + NvmeFormatAndResize(#[from] NvmeFormattingError), } /// A partition (or 'slice') of a disk. @@ -196,9 +198,11 @@ impl PooledDisk { ) -> Result { let paths = &unparsed_disk.paths; let variant = unparsed_disk.variant; + let identity = unparsed_disk.identity(); // Ensure the GPT has the right format. This does not necessarily // mean that the partitions are populated with the data we need. - let partitions = ensure_partition_layout(&log, &paths, variant)?; + let partitions = + ensure_partition_layout(&log, &paths, variant, identity)?; // Find the path to the zpool which exists on this disk. // diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index ebc9a9c2b0..f2db424bda 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -26,7 +26,7 @@ mod gpt; mod partitions; mod sysconf; -pub use partitions::ensure_partition_layout; +pub use partitions::{ensure_partition_layout, NvmeFormattingError}; #[derive(thiserror::Error, Debug)] enum Error { diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 29b2466ad9..1d89a76a02 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -4,10 +4,14 @@ //! illumos-specific mechanisms for parsing disk info. +use std::collections::HashMap; +use std::sync::OnceLock; + use crate::illumos::gpt; use crate::{DiskPaths, DiskVariant, Partition, PooledDiskError}; use camino::Utf8Path; use illumos_utils::zpool::ZpoolName; +use omicron_common::disk::DiskIdentity; use slog::info; use slog::Logger; use uuid::Uuid; @@ -17,6 +21,64 @@ use illumos_utils::zpool::MockZpool as Zpool; #[cfg(not(test))] use illumos_utils::zpool::Zpool; +struct NvmeDeviceSettings { + size: u32, + lba_meta_size: u32, + lba_data_size: u64, +} + +static PREFERRED_NVME_DEVICE_FORMATS: OnceLock< + HashMap<&'static str, NvmeDeviceSettings>, +> = OnceLock::new(); + +fn preferred_nvme_device_settings( +) -> &'static HashMap<&'static str, NvmeDeviceSettings> { + PREFERRED_NVME_DEVICE_FORMATS.get_or_init(|| { + HashMap::from([ + ( + "WUS4C6432DSP3X3", + NvmeDeviceSettings { + size: 3200, + lba_meta_size: 0, + lba_data_size: 512, + }, + ), + ( + "NEWMODELHERE", + NvmeDeviceSettings { + size: 3200, + lba_meta_size: 0, + lba_data_size: 512, + }, + ), + ]) + }) +} + +#[derive(Debug, thiserror::Error)] +#[cfg(target_os = "illumos")] +pub enum NvmeFormattingError { + #[error(transparent)] + NvmeInit(#[from] libnvme::NvmeInitError), + #[error(transparent)] + Nvme(#[from] libnvme::NvmeError), + #[error("Device is missing expected LBA format")] + LbaFormatMissing, + #[error("Device has {0} active namespaces but we expected 1")] + UnexpectedNamespaces(usize), + #[error(transparent)] + InfoError(#[from] libnvme::controller_info::NvmeInfoError), + #[error("Could not find NVMe controller for disk with serial {0}")] + NoController(String), +} + +#[derive(Debug, thiserror::Error)] +#[cfg(not(target_os = "illumos"))] +pub enum NvmeFormattingError { + #[error("NVMe formatting is unsupported on this platform")] + UnsupportedPlatform, +} + // The expected layout of an M.2 device within the Oxide rack. // // Partitions beyond this "expected partition" array are ignored. @@ -79,8 +141,11 @@ pub fn ensure_partition_layout( log: &Logger, paths: &DiskPaths, variant: DiskVariant, + identity: &DiskIdentity, ) -> Result, PooledDiskError> { - internal_ensure_partition_layout::(log, paths, variant) + internal_ensure_partition_layout::( + log, paths, variant, identity, + ) } // Same as the [ensure_partition_layout], but with generic parameters @@ -89,6 +154,7 @@ fn internal_ensure_partition_layout( log: &Logger, paths: &DiskPaths, variant: DiskVariant, + identity: &DiskIdentity, ) -> Result, PooledDiskError> { // Open the "Whole Disk" as a raw device to be parsed by the // libefi-illumos library. This lets us peek at the GPT before @@ -119,6 +185,11 @@ fn internal_ensure_partition_layout( }; match variant { DiskVariant::U2 => { + // First we need to check that this disk is of the proper + // size and correct logical block address formatting. + ensure_size_and_formatting(log, identity)?; + + // If we were successful we can create a zpool on this disk. info!(log, "Formatting zpool on disk {}", paths.devfs_path); // If a zpool does not already exist, create one. let zpool_name = ZpoolName::new_external(Uuid::new_v4()); @@ -154,13 +225,124 @@ fn internal_ensure_partition_layout( } } +#[cfg(target_os = "illumos")] +fn ensure_size_and_formatting( + log: &Logger, + identity: &DiskIdentity, +) -> Result<(), NvmeFormattingError> { + use libnvme::namespace::NamespaceDiscoveryLevel; + use libnvme::Nvme; + + let mut controller_found = false; + + if let Some(nvme_settings) = + preferred_nvme_device_settings().get(identity.model.as_str()) + { + let nvme = Nvme::new()?; + for controller in nvme.controller_discovery()?.into_iter() { + let controller = controller?.write_lock().map_err(|(_, e)| e)?; + let controller_info = controller.get_info()?; + // Make sure we are operating on the correct NVMe device. + if controller_info.serial() != identity.serial { + continue; + }; + controller_found = true; + let nsdisc = controller + .namespace_discovery(NamespaceDiscoveryLevel::Active)?; + let namespaces = + nsdisc.into_iter().collect::, _>>()?; + if namespaces.len() != 1 { + return Err(NvmeFormattingError::UnexpectedNamespaces( + namespaces.len(), + )); + } + + // TODO we need to abstract this detail awway somehow as not all + // devices will be WD + let size = controller.wdc_resize_get()?; + + // First we need to detach blkdev from the namespace + namespaces.iter().try_for_each(|ns| ns.blkdev_detach())?; + + // Resize the device if needed to ensure we get the expected + // durability level in terms of drive writes per day + if size != nvme_settings.size { + // TODO this also needs to be abstracted away + controller.wdc_resize_set(nvme_settings.size)?; + info!( + log, + "Resized {} from {size} to {}", + identity.serial, + nvme_settings.size + ) + } + + // Find the LBA format we want to use for the device + let lba = controller_info + .lba_formats() + .into_iter() + .collect::, _>>()? + .into_iter() + .find(|lba| { + lba.meta_size() == nvme_settings.lba_meta_size + && lba.data_size() == nvme_settings.lba_data_size + }) + .ok_or_else(|| NvmeFormattingError::LbaFormatMissing)? + .id(); + + // Execute the request to format the device + controller + .format_request()? + .set_lbaf(lba)? + // TODO map this to libnvme::BROADCAST_NAMESPACE once added + .set_nsid(u32::MAX)? + // No secure erase + .set_ses(0)? + .execute()?; + + // Attach blkdev to the namespace again + namespaces.iter().try_for_each(|ns| ns.blkdev_attach())?; + info!( + log, + "Formatted {} using LBA with data size of {}", + identity.serial, + nvme_settings.lba_data_size + ); + } + } else { + // XXX should this be a an error? + info!( + log, + "There is no preferred NVMe setting for disk model {}; nothing to\ + do for disk with serial {}", + identity.model, + identity.serial + ); + return Ok(()); + } + + if !controller_found { + return Err(NvmeFormattingError::NoController(identity.serial.clone())); + } + + Ok(()) +} + +#[cfg(not(target_os = "illumos"))] +fn ensure_size_and_formatting( + log: &Logger, + identity: &DiskIdentity, +) -> Result<(), NvmeFormattingError> { + Ok(()) +} + #[cfg(test)] mod test { use super::*; use crate::DiskPaths; use camino::Utf8PathBuf; use illumos_utils::zpool::MockZpool; - use omicron_test_utils::dev::test_setup_log; + use omicron_test_utils::dev::{mock_device_identity, test_setup_log}; use std::path::Path; struct FakePartition { @@ -196,6 +378,7 @@ mod test { &log, &DiskPaths { devfs_path, dev_path: None }, DiskVariant::U2, + &mock_device_identity(), ); match result { Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {} @@ -229,6 +412,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, + &mock_device_identity(), ) .expect("Should have succeeded partitioning disk"); @@ -253,6 +437,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)) }, DiskVariant::M2, + &mock_device_identity(), ) .is_err()); @@ -290,6 +475,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, + &mock_device_identity(), ) .expect("Should be able to parse disk"); @@ -332,6 +518,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::M2, + &mock_device_identity(), ) .expect("Should be able to parse disk"); @@ -371,6 +558,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::M2, + &mock_device_identity(), ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } @@ -396,6 +584,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, + &mock_device_identity(), ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } diff --git a/test-utils/src/dev/mod.rs b/test-utils/src/dev/mod.rs index e29da9c51e..f18e3eade1 100644 --- a/test-utils/src/dev/mod.rs +++ b/test-utils/src/dev/mod.rs @@ -20,6 +20,7 @@ pub use dropshot::test_util::LogContext; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; +use omicron_common::disk::DiskIdentity; use slog::Logger; use std::io::BufReader; @@ -145,3 +146,13 @@ pub fn process_running(pid: u32) -> bool { // only checks whether the process is running. 0 == (unsafe { libc::kill(pid as libc::pid_t, 0) }) } + +/// Returns a DiskIdentity that can be passed to ensure_partition_layout when +/// not operating on a real disk. +pub fn mock_device_identity() -> DiskIdentity { + DiskIdentity { + vendor: "MockVendor".to_string(), + serial: "MOCKSERIAL".to_string(), + model: "MOCKMODEL".to_string(), + } +} From 1d9cfeed44935ed27627cb4063504ff2dfceeda1 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 28 Feb 2024 01:55:33 +0000 Subject: [PATCH 2/8] Fix non illumos support --- sled-hardware/src/illumos/partitions.rs | 17 ----------------- sled-hardware/src/non_illumos/mod.rs | 8 ++++++++ 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 1d89a76a02..5c129b410b 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -56,7 +56,6 @@ fn preferred_nvme_device_settings( } #[derive(Debug, thiserror::Error)] -#[cfg(target_os = "illumos")] pub enum NvmeFormattingError { #[error(transparent)] NvmeInit(#[from] libnvme::NvmeInitError), @@ -72,13 +71,6 @@ pub enum NvmeFormattingError { NoController(String), } -#[derive(Debug, thiserror::Error)] -#[cfg(not(target_os = "illumos"))] -pub enum NvmeFormattingError { - #[error("NVMe formatting is unsupported on this platform")] - UnsupportedPlatform, -} - // The expected layout of an M.2 device within the Oxide rack. // // Partitions beyond this "expected partition" array are ignored. @@ -225,7 +217,6 @@ fn internal_ensure_partition_layout( } } -#[cfg(target_os = "illumos")] fn ensure_size_and_formatting( log: &Logger, identity: &DiskIdentity, @@ -328,14 +319,6 @@ fn ensure_size_and_formatting( Ok(()) } -#[cfg(not(target_os = "illumos"))] -fn ensure_size_and_formatting( - log: &Logger, - identity: &DiskIdentity, -) -> Result<(), NvmeFormattingError> { - Ok(()) -} - #[cfg(test)] mod test { use super::*; diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index d8372dd8aa..8518aae495 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -6,10 +6,17 @@ use crate::disk::{ DiskPaths, DiskVariant, Partition, PooledDiskError, UnparsedDisk, }; use crate::{Baseboard, SledMode}; +use omicron_common::disk::DiskIdentity; use slog::Logger; use std::collections::HashSet; use tokio::sync::broadcast; +#[derive(Debug, thiserror::Error)] +pub enum NvmeFormattingError { + #[error("NVMe formatting is unsupported on this platform")] + UnsupportedPlatform, +} + /// An unimplemented, stub representation of the underlying hardware. /// /// This is intended for non-illumos systems to have roughly the same interface @@ -59,6 +66,7 @@ pub fn ensure_partition_layout( _log: &Logger, _paths: &DiskPaths, _variant: DiskVariant, + _identity: &DiskIdentity, ) -> Result, PooledDiskError> { unimplemented!("Accessing hardware unsupported on non-illumos"); } From e802cacbd3a2c3e4ecc9797d5ccbb35c916e9e5d Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 28 Feb 2024 22:24:54 +0000 Subject: [PATCH 3/8] Update lookup table to match real devices --- sled-hardware/src/illumos/partitions.rs | 101 ++++++++++++------------ 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 5c129b410b..9ae84662bd 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -21,36 +21,29 @@ use illumos_utils::zpool::MockZpool as Zpool; #[cfg(not(test))] use illumos_utils::zpool::Zpool; +// We always want the device to be formatted with 4k data and 0 byte metadata. +// In the future if we want to override these values we can introduce an +// optional value in `NvmeDeviceSettings`. +static DEFAULT_NVME_LBA_META_SIZE: u32 = 0; +static DEFAULT_NVME_LBA_DATA_SIZE: u64 = 4096; + struct NvmeDeviceSettings { size: u32, - lba_meta_size: u32, - lba_data_size: u64, } -static PREFERRED_NVME_DEVICE_FORMATS: OnceLock< +static PREFERRED_NVME_DEVICE_SETTINGS: OnceLock< HashMap<&'static str, NvmeDeviceSettings>, > = OnceLock::new(); fn preferred_nvme_device_settings( ) -> &'static HashMap<&'static str, NvmeDeviceSettings> { - PREFERRED_NVME_DEVICE_FORMATS.get_or_init(|| { + PREFERRED_NVME_DEVICE_SETTINGS.get_or_init(|| { HashMap::from([ - ( - "WUS4C6432DSP3X3", - NvmeDeviceSettings { - size: 3200, - lba_meta_size: 0, - lba_data_size: 512, - }, - ), - ( - "NEWMODELHERE", - NvmeDeviceSettings { - size: 3200, - lba_meta_size: 0, - lba_data_size: 512, - }, - ), + ("WUS4C6432DSP3X3", NvmeDeviceSettings { size: 3200 }), + ("WUS5EA138ESP7E1", NvmeDeviceSettings { size: 3200 }), + ("WUS5EA138ESP7E3", NvmeDeviceSettings { size: 3200 }), + ("WUS5EA176ESP7E1", NvmeDeviceSettings { size: 6400 }), + ("WUS5EA176ESP7E3", NvmeDeviceSettings { size: 6400 }), ]) }) } @@ -247,18 +240,20 @@ fn ensure_size_and_formatting( namespaces.len(), )); } + // Safe because verified there is exactly one namespace. + let namespace = namespaces.into_iter().next().unwrap(); - // TODO we need to abstract this detail awway somehow as not all - // devices will be WD + // TODO we need to abstract this detail awway if/when future devices + // are not from WDC. let size = controller.wdc_resize_get()?; - // First we need to detach blkdev from the namespace - namespaces.iter().try_for_each(|ns| ns.blkdev_detach())?; + // First we need to detach blkdev from the namespace. + namespace.blkdev_detach()?; // Resize the device if needed to ensure we get the expected - // durability level in terms of drive writes per day + // durability level in terms of drive writes per day. if size != nvme_settings.size { - // TODO this also needs to be abstracted away + // TODO this also needs to be abstracted away. controller.wdc_resize_set(nvme_settings.size)?; info!( log, @@ -268,43 +263,47 @@ fn ensure_size_and_formatting( ) } - // Find the LBA format we want to use for the device - let lba = controller_info + // Find the LBA format we want to use for the device. + let desired_lba = controller_info .lba_formats() .into_iter() .collect::, _>>()? .into_iter() .find(|lba| { - lba.meta_size() == nvme_settings.lba_meta_size - && lba.data_size() == nvme_settings.lba_data_size + lba.meta_size() == DEFAULT_NVME_LBA_META_SIZE + && lba.data_size() == DEFAULT_NVME_LBA_DATA_SIZE }) - .ok_or_else(|| NvmeFormattingError::LbaFormatMissing)? - .id(); - - // Execute the request to format the device - controller - .format_request()? - .set_lbaf(lba)? - // TODO map this to libnvme::BROADCAST_NAMESPACE once added - .set_nsid(u32::MAX)? - // No secure erase - .set_ses(0)? - .execute()?; + .ok_or_else(|| NvmeFormattingError::LbaFormatMissing)?; + + // If the controller isn't formatted to our desired LBA we need to + // issue a format request. + let ns_info = namespace.get_info()?; + let current_lba = ns_info.current_format()?; + if current_lba.id() != desired_lba.id() { + controller + .format_request()? + .set_lbaf(desired_lba.id())? + // TODO map this to libnvme::BROADCAST_NAMESPACE once added + .set_nsid(u32::MAX)? + // No secure erase + .set_ses(0)? + .execute()?; + + info!( + log, + "Formatted {} using LBA with data size of {}", + identity.serial, + DEFAULT_NVME_LBA_DATA_SIZE + ); + } // Attach blkdev to the namespace again - namespaces.iter().try_for_each(|ns| ns.blkdev_attach())?; - info!( - log, - "Formatted {} using LBA with data size of {}", - identity.serial, - nvme_settings.lba_data_size - ); + namespace.blkdev_attach()?; } } else { - // XXX should this be a an error? info!( log, - "There is no preferred NVMe setting for disk model {}; nothing to\ + "There are no preferred NVMe settings for disk model {}; nothing to\ do for disk with serial {}", identity.model, identity.serial From e7083271b0e5e3931ba5114851564fbbfa5902c0 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 28 Feb 2024 22:55:04 +0000 Subject: [PATCH 4/8] NvmeDeviceSettings cleanup --- sled-hardware/src/illumos/partitions.rs | 82 ++++++++++++++++++------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 9ae84662bd..9357fee474 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -21,14 +21,18 @@ use illumos_utils::zpool::MockZpool as Zpool; #[cfg(not(test))] use illumos_utils::zpool::Zpool; -// We always want the device to be formatted with 4k data and 0 byte metadata. -// In the future if we want to override these values we can introduce an -// optional value in `NvmeDeviceSettings`. -static DEFAULT_NVME_LBA_META_SIZE: u32 = 0; +/// NVMe devices use a meta size of 0 as we don't support writing addditional +/// metadata +static NVME_LBA_META_SIZE: u32 = 0; +/// NVMe devices default to using 4k logical block addressing unless overriden. static DEFAULT_NVME_LBA_DATA_SIZE: u64 = 4096; +/// NVMe device settings for a particular NVMe model. struct NvmeDeviceSettings { - size: u32, + /// The desired disk size for dealing with overprovisioning. + size: Option, + /// An override for the default 4k LBA formatting. + lba_data_size_override: Option, } static PREFERRED_NVME_DEVICE_SETTINGS: OnceLock< @@ -39,11 +43,40 @@ fn preferred_nvme_device_settings( ) -> &'static HashMap<&'static str, NvmeDeviceSettings> { PREFERRED_NVME_DEVICE_SETTINGS.get_or_init(|| { HashMap::from([ - ("WUS4C6432DSP3X3", NvmeDeviceSettings { size: 3200 }), - ("WUS5EA138ESP7E1", NvmeDeviceSettings { size: 3200 }), - ("WUS5EA138ESP7E3", NvmeDeviceSettings { size: 3200 }), - ("WUS5EA176ESP7E1", NvmeDeviceSettings { size: 6400 }), - ("WUS5EA176ESP7E3", NvmeDeviceSettings { size: 6400 }), + // This disk ships with a size of 3200 from the factory, but we + // still wish to apply 4K LBA formatting. + ( + "WUS4C6432DSP3X3", + NvmeDeviceSettings { size: None, lba_data_size_override: None }, + ), + ( + "WUS5EA138ESP7E1", + NvmeDeviceSettings { + size: Some(3200), + lba_data_size_override: None, + }, + ), + ( + "WUS5EA138ESP7E3", + NvmeDeviceSettings { + size: Some(3200), + lba_data_size_override: None, + }, + ), + ( + "WUS5EA176ESP7E1", + NvmeDeviceSettings { + size: Some(6400), + lba_data_size_override: None, + }, + ), + ( + "WUS5EA176ESP7E3", + NvmeDeviceSettings { + size: Some(6400), + lba_data_size_override: None, + }, + ), ]) }) } @@ -252,26 +285,30 @@ fn ensure_size_and_formatting( // Resize the device if needed to ensure we get the expected // durability level in terms of drive writes per day. - if size != nvme_settings.size { - // TODO this also needs to be abstracted away. - controller.wdc_resize_set(nvme_settings.size)?; - info!( - log, - "Resized {} from {size} to {}", - identity.serial, - nvme_settings.size - ) + if let Some(wanted_size) = nvme_settings.size { + if size != wanted_size { + // TODO this also needs to be abstracted away. + controller.wdc_resize_set(wanted_size)?; + info!( + log, + "Resized {} from {size} to {wanted_size}", + identity.serial, + ) + } } // Find the LBA format we want to use for the device. + let wanted_data_size = nvme_settings + .lba_data_size_override + .unwrap_or(DEFAULT_NVME_LBA_DATA_SIZE); let desired_lba = controller_info .lba_formats() .into_iter() .collect::, _>>()? .into_iter() .find(|lba| { - lba.meta_size() == DEFAULT_NVME_LBA_META_SIZE - && lba.data_size() == DEFAULT_NVME_LBA_DATA_SIZE + lba.meta_size() == NVME_LBA_META_SIZE + && lba.data_size() == wanted_data_size }) .ok_or_else(|| NvmeFormattingError::LbaFormatMissing)?; @@ -291,9 +328,8 @@ fn ensure_size_and_formatting( info!( log, - "Formatted {} using LBA with data size of {}", + "Formatted {} using LBA with data size of {wanted_data_size}", identity.serial, - DEFAULT_NVME_LBA_DATA_SIZE ); } From da05bdb169d2f23b49e0762a716813687bf592d2 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 6 Mar 2024 21:35:13 +0000 Subject: [PATCH 5/8] Cleanup comment about WDC specific features --- sled-hardware/src/illumos/partitions.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 9357fee474..06897dec04 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -276,8 +276,10 @@ fn ensure_size_and_formatting( // Safe because verified there is exactly one namespace. let namespace = namespaces.into_iter().next().unwrap(); - // TODO we need to abstract this detail awway if/when future devices - // are not from WDC. + // NB: Only some vendors such as WDC support adjusting the size + // of the disk to deal with overprovisioning. This will need to be + // abstracted away if/when we ever start using another vendor with + // this capability. let size = controller.wdc_resize_get()?; // First we need to detach blkdev from the namespace. @@ -287,7 +289,6 @@ fn ensure_size_and_formatting( // durability level in terms of drive writes per day. if let Some(wanted_size) = nvme_settings.size { if size != wanted_size { - // TODO this also needs to be abstracted away. controller.wdc_resize_set(wanted_size)?; info!( log, @@ -328,7 +329,8 @@ fn ensure_size_and_formatting( info!( log, - "Formatted {} using LBA with data size of {wanted_data_size}", + "Formatted disk with serial {} to an LBA with data size \ + {wanted_data_size}", identity.serial, ); } @@ -340,7 +342,7 @@ fn ensure_size_and_formatting( info!( log, "There are no preferred NVMe settings for disk model {}; nothing to\ - do for disk with serial {}", + do for disk with serial {}", identity.model, identity.serial ); From 22745be2245d3aece93a609a6c9323950b69d999 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Thu, 7 Mar 2024 15:52:43 +0000 Subject: [PATCH 6/8] Clippy lints --- sled-hardware/src/illumos/partitions.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 06897dec04..151f7b4208 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -256,7 +256,7 @@ fn ensure_size_and_formatting( preferred_nvme_device_settings().get(identity.model.as_str()) { let nvme = Nvme::new()?; - for controller in nvme.controller_discovery()?.into_iter() { + for controller in nvme.controller_discovery()? { let controller = controller?.write_lock().map_err(|(_, e)| e)?; let controller_info = controller.get_info()?; // Make sure we are operating on the correct NVMe device. @@ -304,7 +304,6 @@ fn ensure_size_and_formatting( .unwrap_or(DEFAULT_NVME_LBA_DATA_SIZE); let desired_lba = controller_info .lba_formats() - .into_iter() .collect::, _>>()? .into_iter() .find(|lba| { From 301037016928dcb8a3cc796532e69d7f148e6439 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Thu, 7 Mar 2024 22:29:56 +0000 Subject: [PATCH 7/8] Be explicit about the device size --- sled-hardware/src/illumos/partitions.rs | 45 +++++++++---------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 151f7b4208..54dcca7a9e 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -30,11 +30,13 @@ static DEFAULT_NVME_LBA_DATA_SIZE: u64 = 4096; /// NVMe device settings for a particular NVMe model. struct NvmeDeviceSettings { /// The desired disk size for dealing with overprovisioning. - size: Option, + size: u32, /// An override for the default 4k LBA formatting. lba_data_size_override: Option, } +/// A mapping from model to desired settings. +/// A device not found in this lookup table will not be modified by sled-agent. static PREFERRED_NVME_DEVICE_SETTINGS: OnceLock< HashMap<&'static str, NvmeDeviceSettings>, > = OnceLock::new(); @@ -43,39 +45,25 @@ fn preferred_nvme_device_settings( ) -> &'static HashMap<&'static str, NvmeDeviceSettings> { PREFERRED_NVME_DEVICE_SETTINGS.get_or_init(|| { HashMap::from([ - // This disk ships with a size of 3200 from the factory, but we - // still wish to apply 4K LBA formatting. ( "WUS4C6432DSP3X3", - NvmeDeviceSettings { size: None, lba_data_size_override: None }, + NvmeDeviceSettings { size: 3200, lba_data_size_override: None }, ), ( "WUS5EA138ESP7E1", - NvmeDeviceSettings { - size: Some(3200), - lba_data_size_override: None, - }, + NvmeDeviceSettings { size: 3200, lba_data_size_override: None }, ), ( "WUS5EA138ESP7E3", - NvmeDeviceSettings { - size: Some(3200), - lba_data_size_override: None, - }, + NvmeDeviceSettings { size: 3200, lba_data_size_override: None }, ), ( "WUS5EA176ESP7E1", - NvmeDeviceSettings { - size: Some(6400), - lba_data_size_override: None, - }, + NvmeDeviceSettings { size: 6400, lba_data_size_override: None }, ), ( "WUS5EA176ESP7E3", - NvmeDeviceSettings { - size: Some(6400), - lba_data_size_override: None, - }, + NvmeDeviceSettings { size: 6400, lba_data_size_override: None }, ), ]) }) @@ -287,15 +275,14 @@ fn ensure_size_and_formatting( // Resize the device if needed to ensure we get the expected // durability level in terms of drive writes per day. - if let Some(wanted_size) = nvme_settings.size { - if size != wanted_size { - controller.wdc_resize_set(wanted_size)?; - info!( - log, - "Resized {} from {size} to {wanted_size}", - identity.serial, - ) - } + if size != nvme_settings.size { + controller.wdc_resize_set(nvme_settings.size)?; + info!( + log, + "Resized {} from {size} to {}", + identity.serial, + nvme_settings.size + ) } // Find the LBA format we want to use for the device. From ea09e03dbe4a1467aea84afa6d37885ad58b06fe Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Mon, 11 Mar 2024 15:51:01 +0000 Subject: [PATCH 8/8] nit: mock_device to mock_disk --- sled-hardware/src/illumos/partitions.rs | 16 ++++++++-------- test-utils/src/dev/mod.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 54dcca7a9e..3b8e0af2ee 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -348,7 +348,7 @@ mod test { use crate::DiskPaths; use camino::Utf8PathBuf; use illumos_utils::zpool::MockZpool; - use omicron_test_utils::dev::{mock_device_identity, test_setup_log}; + use omicron_test_utils::dev::{mock_disk_identity, test_setup_log}; use std::path::Path; struct FakePartition { @@ -384,7 +384,7 @@ mod test { &log, &DiskPaths { devfs_path, dev_path: None }, DiskVariant::U2, - &mock_device_identity(), + &mock_disk_identity(), ); match result { Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {} @@ -418,7 +418,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, - &mock_device_identity(), + &mock_disk_identity(), ) .expect("Should have succeeded partitioning disk"); @@ -443,7 +443,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)) }, DiskVariant::M2, - &mock_device_identity(), + &mock_disk_identity(), ) .is_err()); @@ -481,7 +481,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, - &mock_device_identity(), + &mock_disk_identity(), ) .expect("Should be able to parse disk"); @@ -524,7 +524,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::M2, - &mock_device_identity(), + &mock_disk_identity(), ) .expect("Should be able to parse disk"); @@ -564,7 +564,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::M2, - &mock_device_identity(), + &mock_disk_identity(), ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } @@ -590,7 +590,7 @@ mod test { dev_path: Some(Utf8PathBuf::from(DEV_PATH)), }, DiskVariant::U2, - &mock_device_identity(), + &mock_disk_identity(), ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } diff --git a/test-utils/src/dev/mod.rs b/test-utils/src/dev/mod.rs index f18e3eade1..705d6ff08c 100644 --- a/test-utils/src/dev/mod.rs +++ b/test-utils/src/dev/mod.rs @@ -149,7 +149,7 @@ pub fn process_running(pid: u32) -> bool { /// Returns a DiskIdentity that can be passed to ensure_partition_layout when /// not operating on a real disk. -pub fn mock_device_identity() -> DiskIdentity { +pub fn mock_disk_identity() -> DiskIdentity { DiskIdentity { vendor: "MockVendor".to_string(), serial: "MOCKSERIAL".to_string(),