diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 642a4fb31b..6bdc12d052 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -58,6 +58,7 @@ use nexus_db_model::HwBaseboardId; use nexus_db_model::Image; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvPhysicalDisk; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; @@ -4317,36 +4318,62 @@ async fn cmd_db_inventory_physical_disks( model: String, serial: String, variant: String, + firmware: String, + next_firmware: String, } - use db::schema::inv_physical_disk::dsl; - let mut query = dsl::inv_physical_disk.into_boxed(); + use db::schema::inv_nvme_disk_firmware::dsl as firmware_dsl; + use db::schema::inv_physical_disk::dsl as disk_dsl; + + let mut query = disk_dsl::inv_physical_disk.into_boxed(); query = query.limit(i64::from(u32::from(limit))); if let Some(collection_id) = args.collection_id { query = query.filter( - dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), + disk_dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), ); } if let Some(sled_id) = args.sled_id { - query = query.filter(dsl::sled_id.eq(sled_id.into_untyped_uuid())); + query = query.filter(disk_dsl::sled_id.eq(sled_id.into_untyped_uuid())); } let disks = query - .select(InvPhysicalDisk::as_select()) + .left_join( + firmware_dsl::inv_nvme_disk_firmware.on( + firmware_dsl::inv_collection_id + .eq(disk_dsl::inv_collection_id) + .and(firmware_dsl::sled_id.eq(disk_dsl::sled_id)) + .and(firmware_dsl::slot.eq(disk_dsl::slot)), + ), + ) + .select(( + InvPhysicalDisk::as_select(), + Option::::as_select(), + )) .load_async(&**conn) .await .context("loading physical disks")?; - let rows = disks.into_iter().map(|disk| DiskRow { - inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), - sled_id: disk.sled_id.into_untyped_uuid(), - slot: disk.slot, - vendor: disk.vendor, - model: disk.model.clone(), - serial: disk.serial.clone(), - variant: format!("{:?}", disk.variant), + let rows = disks.into_iter().map(|(disk, firmware)| { + let (active_firmware, next_firmware) = + if let Some(firmware) = firmware.as_ref() { + (firmware.current_version(), firmware.next_version()) + } else { + (None, None) + }; + + DiskRow { + inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), + sled_id: disk.sled_id.into_untyped_uuid(), + slot: disk.slot, + vendor: disk.vendor, + model: disk.model.clone(), + serial: disk.serial.clone(), + variant: format!("{:?}", disk.variant), + firmware: active_firmware.unwrap_or("UNKNOWN").to_string(), + next_firmware: next_firmware.unwrap_or("").to_string(), + } }); let table = tabled::Table::new(rows) @@ -4752,6 +4779,7 @@ fn inv_collection_print_sleds(collection: &Collection) { identity, variant, slot, + .. } = disk; println!(" {variant:?}: {identity:?} in {slot}"); } diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index d695f3a14b..a0bc851db4 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -30,6 +30,14 @@ pub struct InventoryDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: DiskVariant, pub slot: i64, + // Today we only have NVMe disks so we embedded the firmware metadata here. + // In the future we can track firmware metadata in a unique type if we + // support more than one disk format. + pub active_firmware_slot: u8, + pub next_active_firmware_slot: Option, + pub number_of_firmware_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, } /// Identifies information about zpools managed by the control plane diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index d5754965f2..ff2132e58c 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -7,10 +7,10 @@ use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, - inv_dataset, inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk, - inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - inv_sled_agent, inv_sled_omicron_zones, inv_zpool, sw_caboose, - sw_root_of_trust_page, + inv_dataset, inv_nvme_disk_firmware, inv_omicron_zone, + inv_omicron_zone_nic, inv_physical_disk, inv_root_of_trust, + inv_root_of_trust_page, inv_service_processor, inv_sled_agent, + inv_sled_omicron_zones, inv_zpool, sw_caboose, sw_root_of_trust_page, }; use crate::typed_uuid::DbTypedUuid; use crate::PhysicalDiskKind; @@ -33,7 +33,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, + BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, + RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::zpool_name::ZpoolName; @@ -885,16 +886,61 @@ impl InvPhysicalDisk { } } -impl From for nexus_types::inventory::PhysicalDisk { - fn from(disk: InvPhysicalDisk) -> Self { +/// See [`nexus_types::inventory::PhysicalDiskFirmware::Nvme`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_nvme_disk_firmware)] +pub struct InvNvmeDiskFirmware { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub slot: i64, + pub active_slot: SqlU8, + pub next_active_slot: Option, + pub number_of_slots: SqlU8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, +} + +impl InvNvmeDiskFirmware { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + sled_slot: i64, + firmware: &NvmeFirmware, + ) -> Self { Self { - identity: omicron_common::disk::DiskIdentity { - vendor: disk.vendor, - serial: disk.serial, - model: disk.model, - }, - variant: disk.variant.into(), - slot: disk.slot, + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + slot: sled_slot, + active_slot: firmware.active_slot.into(), + next_active_slot: firmware.next_active_slot.map(|nas| nas.into()), + number_of_slots: firmware.number_of_slots.into(), + slot1_is_read_only: firmware.slot1_is_read_only, + slot_firmware_versions: firmware.slot_firmware_versions.clone(), + } + } + + /// Attempt to read the current firmware version. + pub fn current_version(&self) -> Option<&str> { + match self.active_slot.0 { + // be paranoid that we have a value within the NVMe spec + slot @ 1..=7 => self + .slot_firmware_versions + .get(usize::from(slot) - 1) + .and_then(|v| v.as_deref()), + _ => None, + } + } + + /// Attempt to read the staged firmware version that will be active upon + /// next device reset. + pub fn next_version(&self) -> Option<&str> { + match self.next_active_slot { + // be paranoid that we have a value within the NVMe spec + Some(slot) if slot.0 <= 7 && slot.0 >= 1 => self + .slot_firmware_versions + .get(usize::from(slot.0) - 1) + .and_then(|v| v.as_deref()), + _ => None, } } } @@ -1551,3 +1597,105 @@ impl InvOmicronZoneNic { zone_nic.into_network_interface_for_zone(zone_id) } } + +#[cfg(test)] +mod test { + use omicron_uuid_kinds::TypedUuid; + + use crate::{typed_uuid, InvNvmeDiskFirmware}; + + #[test] + fn test_inv_nvme_disk_firmware() { + // We can retrieve active_firmware with sane values + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: None, + number_of_slots: 1.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("firmware".to_string())], + }; + assert_eq!(inv_firmware.current_version(), Some("firmware")); + assert_eq!(inv_firmware.next_version(), None); + + // We can retrieve active_firmware and next_active_firmware with sane + // values + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(2.into()), + number_of_slots: 2.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("ONE".to_string()), + Some("TWO".to_string()), + ], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), Some("TWO")); + + // A missing fw version string returns None (although technically + // invalid as a device _shouldn't_ report an empty slot as being + // activated upon reset) + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(2.into()), + number_of_slots: 2.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("ONE".to_string()), None], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), None); + + // Number of slots and slot firmware versions disagreement returns None + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(4.into()), + number_of_slots: 4.into(), + slot1_is_read_only: true, + // number_of_slots reports 4 but the device only gave us two slots + slot_firmware_versions: vec![Some("ONE".to_string()), None], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), None); + + // Verify active_slot and next_active_slot below 1 or above 7 results + // in `None` + for i in [0u8, 8] { + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: i.into(), + next_active_slot: Some(i.into()), + number_of_slots: 7.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("ONE".to_string()), + Some("TWO".to_string()), + Some("THREE".to_string()), + Some("FOUR".to_string()), + Some("FIVE".to_string()), + Some("SIX".to_string()), + Some("SEVEN".to_string()), + // Invalid in the spec but we are testing that the active + // and next active slot can't reach this value + Some("EIGHT".to_string()), + ], + }; + + assert_eq!(inv_firmware.current_version(), None); + assert_eq!(inv_firmware.next_version(), None); + } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 8f137a7bbf..ee3691c985 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1432,6 +1432,20 @@ table! { } } +table! { + inv_nvme_disk_firmware (inv_collection_id, sled_id, slot) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + slot -> Int8, + + number_of_slots -> Int2, + active_slot -> Int2, + next_active_slot -> Nullable, + slot1_is_read_only -> Bool, + slot_firmware_versions -> Array>, + } +} + table! { inv_zpool (inv_collection_id, sled_id, id) { inv_collection_id -> Uuid, @@ -1865,6 +1879,7 @@ allow_tables_to_appear_in_same_query!( network_interface, instance_network_interface, inv_physical_disk, + inv_nvme_disk_firmware, service_network_interface, oximeter, physical_disk, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index bb5fc294d4..52b6bea407 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(98, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(99, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(99, "inventory-nvme-firmware"), KnownVersion::new(98, "oximeter-add-time-expunged"), KnownVersion::new(97, "lookup-region-snapshot-by-region-id"), KnownVersion::new(96, "inv-dataset"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index dac6421bf2..b8d30df0c6 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -39,6 +39,7 @@ use nexus_db_model::InvCaboose; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; use nexus_db_model::InvDataset; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvOmicronZone; use nexus_db_model::InvOmicronZoneNic; use nexus_db_model::InvPhysicalDisk; @@ -61,6 +62,7 @@ use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; +use nexus_types::inventory::PhysicalDiskFirmware; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; @@ -135,6 +137,25 @@ impl DataStore { )) }) .collect::, Error>>()?; + + // Pull disk firmware out of sled agents + let mut nvme_disk_firmware = Vec::new(); + for (sled_id, sled_agent) in &collection.sled_agents { + for disk in &sled_agent.disks { + match &disk.firmware { + PhysicalDiskFirmware::Unknown => (), + PhysicalDiskFirmware::Nvme(firmware) => { + nvme_disk_firmware.push(InvNvmeDiskFirmware::new( + collection_id, + *sled_id, + disk.slot, + firmware, + )); + } + } + } + } + // Pull disks out of all sled agents let disks: Vec<_> = collection .sled_agents @@ -738,6 +759,27 @@ impl DataStore { } } + // Insert rows for all the physical disk firmware we found. + { + use db::schema::inv_nvme_disk_firmware::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut nvme_disk_firmware = nvme_disk_firmware.into_iter(); + loop { + let some_disk_firmware = nvme_disk_firmware + .by_ref() + .take(batch_size) + .collect::>(); + if some_disk_firmware.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_nvme_disk_firmware) + .values(some_disk_firmware) + .execute_async(&conn) + .await?; + } + } + // Insert rows for all the zpools we found. { use db::schema::inv_zpool::dsl; @@ -1168,6 +1210,7 @@ impl DataStore { nrot_pages, nsled_agents, nphysical_disks, + nnvme_disk_disk_firmware, nsled_agent_zones, nzones, nnics, @@ -1252,6 +1295,17 @@ impl DataStore { .await? }; + // Remove rows for NVMe physical disk firmware found. + let nnvme_disk_firwmare = + { + use db::schema::inv_nvme_disk_firmware::dsl; + diesel::delete(dsl::inv_nvme_disk_firmware.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + // Remove rows associated with Omicron zones let nsled_agent_zones = { @@ -1312,6 +1366,7 @@ impl DataStore { nrot_pages, nsled_agents, nphysical_disks, + nnvme_disk_firwmare, nsled_agent_zones, nzones, nnics, @@ -1336,6 +1391,7 @@ impl DataStore { "nrot_pages" => nrot_pages, "nsled_agents" => nsled_agents, "nphysical_disks" => nphysical_disks, + "nnvme_disk_firmware" => nnvme_disk_disk_firmware, "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, @@ -1564,15 +1620,69 @@ impl DataStore { rows }; + // Mapping of "Sled ID" -> "Mapping of physical slot -> disk firmware" + let disk_firmware: BTreeMap< + SledUuid, + BTreeMap, + > = { + use db::schema::inv_nvme_disk_firmware::dsl; + + let mut disk_firmware = BTreeMap::< + SledUuid, + BTreeMap, + >::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_nvme_disk_firmware, + (dsl::sled_id, dsl::slot), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvNvmeDiskFirmware::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = + p.found_batch(&batch, &|row| (row.sled_id, row.slot)); + for firmware in batch { + disk_firmware + .entry(firmware.sled_id.into()) + .or_default() + .insert( + firmware.slot, + nexus_types::inventory::PhysicalDiskFirmware::Nvme( + nexus_types::inventory::NvmeFirmware { + active_slot: firmware.active_slot.into(), + next_active_slot: firmware + .next_active_slot + .map(|nas| nas.into()), + number_of_slots: firmware + .number_of_slots + .into(), + slot1_is_read_only: firmware + .slot1_is_read_only, + slot_firmware_versions: firmware + .slot_firmware_versions, + }, + ), + ); + } + } + disk_firmware + }; + // Mapping of "Sled ID" -> "All disks reported by that sled" let physical_disks: BTreeMap< - Uuid, + SledUuid, Vec, > = { use db::schema::inv_physical_disk::dsl; let mut disks = BTreeMap::< - Uuid, + SledUuid, Vec, >::new(); let mut paginator = Paginator::new(batch_size); @@ -1592,10 +1702,24 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for disk in batch { - disks - .entry(disk.sled_id.into_untyped_uuid()) - .or_default() - .push(disk.into()); + let sled_id = disk.sled_id.into(); + let firmware = disk_firmware + .get(&sled_id) + .and_then(|lookup| lookup.get(&disk.slot)) + .unwrap_or(&nexus_types::inventory::PhysicalDiskFirmware::Unknown); + + disks.entry(sled_id).or_default().push( + nexus_types::inventory::PhysicalDisk { + identity: omicron_common::disk::DiskIdentity { + vendor: disk.vendor, + model: disk.model, + serial: disk.serial, + }, + variant: disk.variant.into(), + slot: disk.slot, + firmware: firmware.clone(), + }, + ); } } disks @@ -1764,7 +1888,7 @@ impl DataStore { usable_physical_ram: s.usable_physical_ram.into(), reservoir_size: s.reservoir_size.into(), disks: physical_disks - .get(sled_id.as_untyped_uuid()) + .get(&sled_id) .map(|disks| disks.to_vec()) .unwrap_or_default(), zpools: zpools @@ -2772,4 +2896,215 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + /// While other tests mostly exercise inserting [`PhysicalDisk`]s with + /// NVMe firmware we want additional checks on some constraints that the + /// `inv_nvme_disk_firwmare` table sets. + #[tokio::test] + async fn test_nvme_firmware_inventory_insert() { + use nexus_types::external_api::params::PhysicalDiskKind; + use nexus_types::inventory::{ + Collection, NvmeFirmware, PhysicalDisk, PhysicalDiskFirmware, + }; + + fn add_disk_with_firmware_to_collection( + active_slot: u8, + next_active_slot: Option, + number_of_slots: u8, + slot1_is_read_only: bool, + slot_firmware_versions: Vec>, + ) -> Collection { + let Representative { builder, .. } = representative(); + let mut collection = builder.build(); + + let disk = PhysicalDisk { + identity: omicron_common::disk::DiskIdentity { + vendor: "mike".to_string(), + model: "buffalo".to_string(), + serial: "716".to_string(), + }, + variant: PhysicalDiskKind::U2, + + slot: 1, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot, + next_active_slot, + number_of_slots, + slot1_is_read_only, + slot_firmware_versions, + }), + }; + + // Remove all disks on the first sled-agent and replace it with our + // new physical disk. + std::mem::swap(&mut collection + .sled_agents + .first_entry() + .expect("collection built from representative should have a sled agent") + .into_mut() + .disks, + &mut vec![disk]); + + collection + } + + let logctx = dev::test_setup_log("nvme_firmware_inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Verify we don't allow firmware with no firmware slots + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 0, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with 0 slots"); + + // Verify we don't allow firmware with too many slots + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 8, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with 8 slots"); + + // Verify we don't allow an active_slot of 0 + let collection = add_disk_with_firmware_to_collection( + 0, + None, + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with active_slot 0"); + + // Verify we don't allow an active_slot larger than 7 + let collection = add_disk_with_firmware_to_collection( + 8, + None, + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with active_slot 8"); + + // Verify we don't allow an next_active_slot of 0 + let collection = add_disk_with_firmware_to_collection( + 1, + Some(0), + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with next_active_slot 0"); + + // Verify we don't allow an next_active_slot larger than 7 + let collection = add_disk_with_firmware_to_collection( + 1, + Some(8), + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with next_active_slot 8"); + + // XXX omicron#6514 + // - We would like to deny insertion when a firmware version string is + // larger than the NVMe spec would allow but we have discoverd that our + // version of CRDB will silently truncate a string to size bound rather + // than erroring. + // - The NVMe spec defines the firmware versions as an array of 7 + // entries made up of 8 characters (ascii data) so unless the device + // firmware has a bug or the rust libnvme binding has a bug the worst we + // should see is the truncated value. + // - The CRDB STRING type is checking unicode length rather than byte + // length. However, CRDB won't let us use the BYTES type with an ARRAY. + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 1, + true, + vec![Some("FIRMWARETOOLARGE".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("firmware inserted modulo version string truncation"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + let (_, value) = &collection_read + .sled_agents + .first_key_value() + .expect("we have a sled-agent in the collection"); + let disk = + value.disks.first().expect("sled-agent has the disk we inserted"); + let PhysicalDiskFirmware::Nvme(ref firmware) = disk.firmware else { + panic!( + "Didn't find the expected NVMe firmware that we just inserted" + ) + }; + assert_eq!( + firmware.slot_firmware_versions, + vec![Some("FIRMWARE".to_string())], + "Firmware versions contains the truncated string" + ); + + // Finally lets actually insert a valid configuration and assert that we + // can pull it back out correctly. + // + // - 3 slots + // - 1st slot is active + // - 1st slot is marked read-only + // - 2nd slot is marked active on next reset + // - 3rd slot has no firmware uploaded into it + let collection = add_disk_with_firmware_to_collection( + 1, + Some(2), + 3, + true, + vec![Some("FOO".to_string()), Some("BAR".to_string()), None], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("firmware inserted"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + assert_eq!(collection, collection_read); + + // NOTE that there is no validation that the bounds checked values + // in the database make sense. For example a device that reports + // a next_active_slot of 2 but only has a value of 1 in the + // number_of_slots field. These configurations are guarded against by + // the `InvNvmeDiskFirmware` type itself. + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 8162b11f2d..ae31ade02d 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -712,6 +712,41 @@ mod test { }, variant: DiskVariant::U2, slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], + } + } + + // helper function to eaisly convert from [`InventoryDisk`] to + // [`nexus_types::inventory::PhysicalDisk`] + fn inv_phys_disk_to_nexus_phys_disk( + inv_phys_disk: &InvPhysicalDisk, + ) -> nexus_types::inventory::PhysicalDisk { + use nexus_types::inventory::NvmeFirmware; + use nexus_types::inventory::PhysicalDiskFirmware; + + // We reuse `create_inv_disk` so that we get the same NVMe firmware + // values throughout the tests. + let inv_disk = create_inv_disk("unused".to_string(), 0); + + nexus_types::inventory::PhysicalDisk { + identity: DiskIdentity { + vendor: inv_phys_disk.vendor.clone(), + model: inv_phys_disk.model.clone(), + serial: inv_phys_disk.serial.clone(), + }, + variant: inv_phys_disk.variant.into(), + slot: inv_phys_disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: inv_disk.active_firmware_slot, + next_active_slot: inv_disk.next_active_firmware_slot, + number_of_slots: inv_disk.number_of_firmware_slots, + slot1_is_read_only: inv_disk.slot1_is_read_only, + slot_firmware_versions: inv_disk.slot_firmware_versions, + }), } } @@ -829,7 +864,10 @@ mod test { // Normalize the data a bit -- convert to nexus types, and sort vecs for // stability in the comparison. let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = @@ -879,7 +917,10 @@ mod test { // uninitailized) and the last two disks of "disks_b" (of which both are // still uninitialized). let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 626f077deb..8d7ce30411 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -280,6 +280,7 @@ pub fn representative() -> Representative { // This first one will match "sled1_bb"'s baseboard information. let sled_agent_id_basic = "c5aec1df-b897-49e4-8085-ccd975f9b529".parse().unwrap(); + // Add some disks to this first sled. let disks = vec![ // Let's say we have one manufacturer for our M.2... @@ -291,6 +292,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::M2, slot: 0, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, // ... and a couple different vendors for our U.2s InventoryDisk { @@ -301,6 +307,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 1, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -310,6 +321,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 2, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -319,6 +335,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 3, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, ]; let zpools = vec![]; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 538c7bec1b..6b282499a9 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -562,6 +562,13 @@ impl Sled { identity: d.disk_identity.clone(), variant: DiskVariant::U2, slot: i64::try_from(i).unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some( + "SIMUL1".to_string(), + )], }) .collect(), // Zpools & Datasets won't necessarily show up until our first diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index f9ad9a5cf0..563fd8a4d3 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -377,6 +377,23 @@ impl IntoRotPage for gateway_client::types::RotCfpa { } } +/// Firmware reported for a physical NVMe disk. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NvmeFirmware { + pub active_slot: u8, + pub next_active_slot: Option, + pub number_of_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, +} + +/// Firmware reported by sled agent for a particular disk format. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum PhysicalDiskFirmware { + Unknown, + Nvme(NvmeFirmware), +} + /// A physical disk reported by a sled agent. /// /// This identifies that a physical disk appears in a Sled. @@ -391,6 +408,7 @@ pub struct PhysicalDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: PhysicalDiskKind, pub slot: i64, + pub firmware: PhysicalDiskFirmware, } impl From for PhysicalDisk { @@ -399,6 +417,13 @@ impl From for PhysicalDisk { identity: disk.identity, variant: disk.variant.into(), slot: disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: disk.active_firmware_slot, + next_active_slot: disk.next_active_firmware_slot, + number_of_slots: disk.number_of_firmware_slots, + slot1_is_read_only: disk.slot1_is_read_only, + slot_firmware_versions: disk.slot_firmware_versions, + }), } } } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 39fe9dd3e0..3cc53d3adb 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3463,20 +3463,50 @@ "description": "Identifies information about disks which may be attached to Sleds.", "type": "object", "properties": { + "active_firmware_slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "identity": { "$ref": "#/components/schemas/DiskIdentity" }, + "next_active_firmware_slot": { + "nullable": true, + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "number_of_firmware_slots": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "slot": { "type": "integer", "format": "int64" }, + "slot1_is_read_only": { + "type": "boolean" + }, + "slot_firmware_versions": { + "type": "array", + "items": { + "nullable": true, + "type": "string" + } + }, "variant": { "$ref": "#/components/schemas/DiskVariant" } }, "required": [ + "active_firmware_slot", "identity", + "number_of_firmware_slots", "slot", + "slot1_is_read_only", + "slot_firmware_versions", "variant" ] }, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e26626818d..c09a1cd588 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3216,13 +3216,42 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_physical_disk ( variant omicron.public.physical_disk_kind NOT NULL, - -- FK consisting of: + -- PK consisting of: -- - Which collection this was -- - The sled reporting the disk -- - The slot in which this disk was found PRIMARY KEY (inv_collection_id, sled_id, slot) ); +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + -- The slot where this disk was last observed + slot INT8 CHECK (slot >= 0) NOT NULL, + + -- total number of firmware slots the device has + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, + -- staged firmware slot to be active on reset + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), + -- slot1 is distinct in the NVMe spec in the sense that it can be read only + slot1_is_read_only BOOLEAN, + -- the firmware version string for each NVMe slot (0 indexed), a NULL means the + -- slot exists but is empty + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + -- PK consisting of: + -- - Which collection this was + -- - The sled reporting the disk + -- - The slot in which the disk was found + PRIMARY KEY (inv_collection_id, sled_id, slot) +); + CREATE TABLE IF NOT EXISTS omicron.public.inv_zpool ( -- where this observation came from -- (foreign key into `inv_collection` table) @@ -4299,7 +4328,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '98.0.0', NULL) + (TRUE, NOW(), NOW(), '99.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/inventory-nvme-firmware/up01.sql b/schema/crdb/inventory-nvme-firmware/up01.sql new file mode 100644 index 0000000000..0191fd31f1 --- /dev/null +++ b/schema/crdb/inventory-nvme-firmware/up01.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + inv_collection_id UUID NOT NULL, + + sled_id UUID NOT NULL, + slot INT8 CHECK (slot >= 0) NOT NULL, + + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), + slot1_is_read_only BOOLEAN, + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + PRIMARY KEY (inv_collection_id, sled_id, slot) +); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1c1f3cb0b3..279253d323 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1626,6 +1626,11 @@ mod test { }, variant: DiskVariant::U2, slot: i.try_into().unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], }) .collect(), zpools: vec![], diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 3a1ece4439..957965c6a0 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -845,6 +845,11 @@ impl SledAgent { identity: info.identity.clone(), variant: info.variant, slot: info.slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("SIMUL1".to_string())], }) .collect(), zpools: storage diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e7fcdfc327..7f4cc4fda6 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1221,11 +1221,16 @@ impl SledAgent { let mut zpools = vec![]; let mut datasets = vec![]; let all_disks = self.storage().get_latest_disks().await; - for (identity, variant, slot, _firmware) in all_disks.iter_all() { + for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { identity: identity.clone(), variant, slot, + active_firmware_slot: firmware.active_slot(), + next_active_firmware_slot: firmware.next_active_slot(), + number_of_firmware_slots: firmware.number_of_slots(), + slot1_is_read_only: firmware.slot1_read_only(), + slot_firmware_versions: firmware.slots().to_vec(), }); } for zpool in all_disks.all_u2_zpools() { diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index f95efb985e..a07ddea8cf 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -138,6 +138,7 @@ pub struct DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, // NB: This vec is 0 indexed while active_slot and next_active_slot are // referring to "slots" in terms of the NVMe spec which defines slots 1-7. // If the active_slot is 1, then it will be slot_firmware_versions[0] in the @@ -158,6 +159,10 @@ impl DiskFirmware { self.slot1_read_only } + pub fn number_of_slots(&self) -> u8 { + self.number_of_slots + } + pub fn slots(&self) -> &[Option] { self.slot_firmware_versions.as_slice() } @@ -168,12 +173,14 @@ impl DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, slots: Vec>, ) -> Self { Self { active_slot, next_active_slot, slot1_read_only, + number_of_slots, slot_firmware_versions: slots, } } diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 83ef685b67..3664a7cf06 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -576,6 +576,7 @@ fn poll_blkdev_node( firmware_log_page.active_slot, firmware_log_page.next_active_slot, firmware_log_page.slot1_is_read_only, + firmware_log_page.number_of_slots, firmware_log_page.slot_iter().map(|s| s.map(str::to_string)).collect(), ); diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 51980bea3a..ae0f5d25dc 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -44,8 +44,8 @@ const SYNTHETIC_SLOT_OFFSET: i64 = 1024; // A generic name for the firmware in slot1 of an NVMe device. // -// bhyve for example uses "1.0" and marks slot1 as read-only. -const SYNTHETIC_FIRMWARE_SLOT1: &str = "synthetic 1.0"; +// bhyve for example uses "1.0" and marks slot1 as read-only (must be within 8chars). +const SYNTHETIC_FIRMWARE_SLOT1: &str = "SYNTH1"; impl SyntheticDisk { // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing @@ -144,6 +144,7 @@ impl RawSyntheticDisk { 1, None, true, + 1, vec![Some(SYNTHETIC_FIRMWARE_SLOT1.to_string())], ); diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 21d54b4176..57ceb7d63d 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -1283,6 +1283,7 @@ mod tests { disk.firmware.active_slot(), disk.firmware.next_active_slot(), disk.firmware.slot1_read_only(), + disk.firmware.number_of_slots(), slots, ); }