From f38eeb9a0900f6bc24cd979f2839c7b2ea8b986b Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Thu, 11 Jul 2024 20:35:31 +0000 Subject: [PATCH 1/8] Store NVMe firmware in inventory collection --- nexus-sled-agent-shared/src/inventory.rs | 8 ++ nexus/db-model/src/inventory.rs | 55 ++++++--- nexus/db-model/src/schema.rs | 15 +++ .../db-queries/src/db/datastore/inventory.rs | 115 +++++++++++++++++- .../src/db/datastore/physical_disk.rs | 45 ++++++- nexus/inventory/src/examples.rs | 21 ++++ nexus/reconfigurator/planning/src/system.rs | 7 ++ nexus/types/src/inventory.rs | 25 ++++ openapi/sled-agent.json | 30 +++++ schema/crdb/dbinit.sql | 31 ++++- sled-agent/src/rack_setup/service.rs | 5 + sled-agent/src/sim/sled_agent.rs | 5 + sled-agent/src/sled_agent.rs | 7 +- sled-hardware/src/disk.rs | 7 ++ sled-hardware/src/illumos/mod.rs | 1 + sled-storage/src/disk.rs | 5 +- sled-storage/src/manager.rs | 1 + 17 files changed, 358 insertions(+), 25 deletions(-) diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 8a793f61501..b59e1fdf504 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -28,6 +28,14 @@ pub struct InventoryDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: DiskVariant, pub slot: i64, + // Today we only have NVMe disks so we embedded the firmware metadata here. + // In the future we can track firmware metadata in a unique type if we + // support more than one disk format. + pub active_firmware_slot: u8, + pub next_active_firmware_slot: Option, + pub number_of_firmware_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, } /// Identifies information about zpools managed by the control plane diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 400acc68b39..fdcf20c853d 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -7,10 +7,10 @@ use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, - inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk, - inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - inv_sled_agent, inv_sled_omicron_zones, inv_zpool, sw_caboose, - sw_root_of_trust_page, + inv_nvme_disk_firmware, inv_omicron_zone, inv_omicron_zone_nic, + inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page, + inv_service_processor, inv_sled_agent, inv_sled_omicron_zones, inv_zpool, + sw_caboose, sw_root_of_trust_page, }; use crate::typed_uuid::DbTypedUuid; use crate::PhysicalDiskKind; @@ -32,7 +32,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZonesConfig, }; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, + BaseboardId, Caboose, Collection, PhysicalDiskFirmware, PowerState, + RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; use omicron_uuid_kinds::CollectionKind; @@ -881,16 +882,40 @@ impl InvPhysicalDisk { } } -impl From for nexus_types::inventory::PhysicalDisk { - fn from(disk: InvPhysicalDisk) -> Self { - Self { - identity: omicron_common::disk::DiskIdentity { - vendor: disk.vendor, - serial: disk.serial, - model: disk.model, - }, - variant: disk.variant.into(), - slot: disk.slot, +/// See [`nexus_types::inventory::PhysicalDiskFirmware::Nvme`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_nvme_disk_firmware)] +pub struct InvNvmeDiskFirmware { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub slot: i64, + pub active_slot: SqlU8, + pub next_active_slot: Option, + pub number_of_slots: SqlU8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, +} + +impl InvNvmeDiskFirmware { + pub fn try_from_physical_disk( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + disk: nexus_types::inventory::PhysicalDisk, + ) -> Option { + match disk.firmware { + PhysicalDiskFirmware::Unknown => None, + PhysicalDiskFirmware::Nvme(firmware) => Some(Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + slot: disk.slot, + active_slot: firmware.active_slot.into(), + next_active_slot: firmware + .next_active_slot + .map(|nas| nas.into()), + number_of_slots: firmware.number_of_slots.into(), + slot1_is_read_only: firmware.slot1_is_read_only, + slot_firmware_versions: firmware.slot_firmware_versions, + }), } } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 845da13a446..4da879fa799 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1436,6 +1436,20 @@ table! { } } +table! { + inv_nvme_disk_firmware (inv_collection_id, sled_id, slot) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + slot -> Int8, + + number_of_slots -> Int2, + active_slot -> Int2, + next_active_slot -> Nullable, + slot1_is_read_only -> Bool, + slot_firmware_versions -> Array>, + } +} + table! { inv_zpool (inv_collection_id, sled_id, id) { inv_collection_id -> Uuid, @@ -1854,6 +1868,7 @@ allow_tables_to_appear_in_same_query!( network_interface, instance_network_interface, inv_physical_disk, + inv_nvme_disk_firmware, service_network_interface, oximeter, physical_disk, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 1774a25c488..d72a480f076 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -38,6 +38,7 @@ use nexus_db_model::HwRotSlotEnum; use nexus_db_model::InvCaboose; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvOmicronZone; use nexus_db_model::InvOmicronZoneNic; use nexus_db_model::InvPhysicalDisk; @@ -134,6 +135,23 @@ impl DataStore { )) }) .collect::, Error>>()?; + + // Pull disk firmware out of sled agents + let disk_firmware: Vec<_> = collection + .sled_agents + .iter() + .flat_map(|(sled_id, sled_agent)| { + sled_agent.disks.iter().map(|disk| { + InvNvmeDiskFirmware::try_from_physical_disk( + collection_id, + *sled_id, + disk.clone(), + ) + }) + }) + .flatten() + .collect(); + // Pull disks out of all sled agents let disks: Vec<_> = collection .sled_agents @@ -726,6 +744,27 @@ impl DataStore { } } + // Insert rows for all the physical disk firmware we found. + { + use db::schema::inv_nvme_disk_firmware::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut disk_firmware = disk_firmware.into_iter(); + loop { + let some_disk_firmware = disk_firmware + .by_ref() + .take(batch_size) + .collect::>(); + if some_disk_firmware.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_nvme_disk_firmware) + .values(some_disk_firmware) + .execute_async(&conn) + .await?; + } + } + // Insert rows for all the zpools we found. { use db::schema::inv_zpool::dsl; @@ -1533,6 +1572,60 @@ impl DataStore { rows }; + // Mapping of "Sled ID" -> "Mapping of physical slot -> disk firmware" + let disk_firmware: BTreeMap< + Uuid, + BTreeMap, + > = { + use db::schema::inv_nvme_disk_firmware::dsl; + + let mut disk_firmware = BTreeMap::< + Uuid, + BTreeMap, + >::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_nvme_disk_firmware, + (dsl::sled_id, dsl::slot), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvNvmeDiskFirmware::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = + p.found_batch(&batch, &|row| (row.sled_id, row.slot)); + for firmware in batch { + disk_firmware + .entry(firmware.sled_id.into_untyped_uuid()) + .or_default() + .insert( + firmware.slot, + nexus_types::inventory::PhysicalDiskFirmware::Nvme( + nexus_types::inventory::NvmeFirmware { + active_slot: firmware.active_slot.into(), + next_active_slot: firmware + .next_active_slot + .map(|nas| nas.into()), + number_of_slots: firmware + .number_of_slots + .into(), + slot1_is_read_only: firmware + .slot1_is_read_only, + slot_firmware_versions: firmware + .slot_firmware_versions, + }, + ), + ); + } + } + disk_firmware + }; + // Mapping of "Sled ID" -> "All disks reported by that sled" let physical_disks: BTreeMap< Uuid, @@ -1561,10 +1654,24 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for disk in batch { - disks - .entry(disk.sled_id.into_untyped_uuid()) - .or_default() - .push(disk.into()); + let sled_id = disk.sled_id.into_untyped_uuid(); + let firmware = disk_firmware + .get(&sled_id) + .and_then(|lookup| lookup.get(&disk.slot)) + .unwrap_or(&nexus_types::inventory::PhysicalDiskFirmware::Unknown); + + disks.entry(sled_id).or_default().push( + nexus_types::inventory::PhysicalDisk { + identity: omicron_common::disk::DiskIdentity { + vendor: disk.vendor, + model: disk.model, + serial: disk.serial, + }, + variant: disk.variant.into(), + slot: disk.slot, + firmware: firmware.clone(), + }, + ); } } disks diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index dc26e093c00..7dfcd58ce47 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -711,6 +711,41 @@ mod test { }, variant: DiskVariant::U2, slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], + } + } + + // helper function to eaisly convert from [`InventoryDisk`] to + // [`nexus_types::inventory::PhysicalDisk`] + fn inv_phys_disk_to_nexus_phys_disk( + inv_phys_disk: &InvPhysicalDisk, + ) -> nexus_types::inventory::PhysicalDisk { + use nexus_types::inventory::NvmeFirmware; + use nexus_types::inventory::PhysicalDiskFirmware; + + // We reuse `create_inv_disk` so that we get the same NVMe firmware + // values throughout the tests. + let inv_disk = create_inv_disk("unused".to_string(), 0); + + nexus_types::inventory::PhysicalDisk { + identity: DiskIdentity { + vendor: inv_phys_disk.vendor.clone(), + model: inv_phys_disk.model.clone(), + serial: inv_phys_disk.serial.clone(), + }, + variant: inv_phys_disk.variant.into(), + slot: inv_phys_disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: inv_disk.active_firmware_slot, + next_active_slot: inv_disk.next_active_firmware_slot, + number_of_slots: inv_disk.number_of_firmware_slots, + slot1_is_read_only: inv_disk.slot1_is_read_only, + slot_firmware_versions: inv_disk.slot_firmware_versions, + }), } } @@ -828,7 +863,10 @@ mod test { // Normalize the data a bit -- convert to nexus types, and sort vecs for // stability in the comparison. let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = @@ -878,7 +916,10 @@ mod test { // uninitailized) and the last two disks of "disks_b" (of which both are // still uninitialized). let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 2dade3d34f1..da2091c4cc6 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -279,6 +279,7 @@ pub fn representative() -> Representative { // This first one will match "sled1_bb"'s baseboard information. let sled_agent_id_basic = "c5aec1df-b897-49e4-8085-ccd975f9b529".parse().unwrap(); + // Add some disks to this first sled. let disks = vec![ // Let's say we have one manufacturer for our M.2... @@ -290,6 +291,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::M2, slot: 0, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, // ... and a couple different vendors for our U.2s InventoryDisk { @@ -300,6 +306,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 1, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -309,6 +320,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 2, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -318,6 +334,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 3, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, ]; let zpools = vec![]; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index f6989be9eff..9faf14bdc08 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -556,6 +556,13 @@ impl Sled { identity: d.disk_identity.clone(), variant: DiskVariant::U2, slot: i64::try_from(i).unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some( + "SIMUL1".to_string(), + )], }) .collect(), // Zpools won't necessarily show up until our first request diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 0ec2f6fbdbf..717a0cbde4b 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -356,6 +356,23 @@ impl IntoRotPage for gateway_client::types::RotCfpa { } } +/// Firmware reported for a physical NVMe disk. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NvmeFirmware { + pub active_slot: u8, + pub next_active_slot: Option, + pub number_of_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, +} + +/// Firmware reported by sled agent for a particular disk format. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum PhysicalDiskFirmware { + Unknown, + Nvme(NvmeFirmware), +} + /// A physical disk reported by a sled agent. /// /// This identifies that a physical disk appears in a Sled. @@ -370,6 +387,7 @@ pub struct PhysicalDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: PhysicalDiskKind, pub slot: i64, + pub firmware: PhysicalDiskFirmware, } impl From for PhysicalDisk { @@ -378,6 +396,13 @@ impl From for PhysicalDisk { identity: disk.identity, variant: disk.variant.into(), slot: disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: disk.active_firmware_slot, + next_active_slot: disk.next_active_firmware_slot, + number_of_slots: disk.number_of_firmware_slots, + slot1_is_read_only: disk.slot1_is_read_only, + slot_firmware_versions: disk.slot_firmware_versions, + }), } } } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ecaff330428..5ffd3d4eded 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3284,20 +3284,50 @@ "description": "Identifies information about disks which may be attached to Sleds.", "type": "object", "properties": { + "active_firmware_slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "identity": { "$ref": "#/components/schemas/DiskIdentity" }, + "next_active_firmware_slot": { + "nullable": true, + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "number_of_firmware_slots": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "slot": { "type": "integer", "format": "int64" }, + "slot1_is_read_only": { + "type": "boolean" + }, + "slot_firmware_versions": { + "type": "array", + "items": { + "nullable": true, + "type": "string" + } + }, "variant": { "$ref": "#/components/schemas/DiskVariant" } }, "required": [ + "active_firmware_slot", "identity", + "number_of_firmware_slots", "slot", + "slot1_is_read_only", + "slot_firmware_versions", "variant" ] }, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2a83f01298d..32ac9cc1ebc 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3157,13 +3157,42 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_physical_disk ( variant omicron.public.physical_disk_kind NOT NULL, - -- FK consisting of: + -- PK consisting of: -- - Which collection this was -- - The sled reporting the disk -- - The slot in which this disk was found PRIMARY KEY (inv_collection_id, sled_id, slot) ); +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + -- The slot where this disk was last observed + slot INT8 CHECK (slot >= 0) NOT NULL, + + -- total number of firmware slots the device has + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + -- staged firmware slot to be active on reset + next_active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7), + -- slot1 is distinct in the NVMe spec in the sense that it can be read only + slot1_is_read_only BOOLEAN, + -- the firmware version string for each NVMe slot (0 indexed), a NULL means the + -- slot exists but is empty + slot_firmware_versions STRING(8)[], + CONSTRAINT check_slot_firmware_versions_len CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + -- PK consisting of: + -- - Which collection this was + -- - The sled reporting the disk + PRIMARY KEY (inv_collection_id, sled_id, slot) +); + CREATE TABLE IF NOT EXISTS omicron.public.inv_zpool ( -- where this observation came from -- (foreign key into `inv_collection` table) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ac51912fe67..6ee22f5ba4d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1590,6 +1590,11 @@ mod test { }, variant: DiskVariant::U2, slot: i.try_into().unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], }) .collect(), zpools: vec![], diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 79d57a42e65..3f9b8d2933a 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -889,6 +889,11 @@ impl SledAgent { identity: info.identity.clone(), variant: info.variant, slot: info.slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("SIMUL1".to_string())], }) .collect(), zpools: storage diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d87df0d7c53..c12bcbde0a8 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1211,11 +1211,16 @@ impl SledAgent { let mut disks = vec![]; let mut zpools = vec![]; let all_disks = self.storage().get_latest_disks().await; - for (identity, variant, slot, _firmware) in all_disks.iter_all() { + for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { identity: identity.clone(), variant, slot, + active_firmware_slot: firmware.active_slot(), + next_active_firmware_slot: firmware.next_active_slot(), + number_of_firmware_slots: firmware.number_of_slots(), + slot1_is_read_only: firmware.slot1_read_only(), + slot_firmware_versions: firmware.slots().to_vec(), }); } for zpool in all_disks.all_u2_zpools() { diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index f95efb985ef..a07ddea8cfd 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -138,6 +138,7 @@ pub struct DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, // NB: This vec is 0 indexed while active_slot and next_active_slot are // referring to "slots" in terms of the NVMe spec which defines slots 1-7. // If the active_slot is 1, then it will be slot_firmware_versions[0] in the @@ -158,6 +159,10 @@ impl DiskFirmware { self.slot1_read_only } + pub fn number_of_slots(&self) -> u8 { + self.number_of_slots + } + pub fn slots(&self) -> &[Option] { self.slot_firmware_versions.as_slice() } @@ -168,12 +173,14 @@ impl DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, slots: Vec>, ) -> Self { Self { active_slot, next_active_slot, slot1_read_only, + number_of_slots, slot_firmware_versions: slots, } } diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 83ef685b676..3664a7cf06f 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -576,6 +576,7 @@ fn poll_blkdev_node( firmware_log_page.active_slot, firmware_log_page.next_active_slot, firmware_log_page.slot1_is_read_only, + firmware_log_page.number_of_slots, firmware_log_page.slot_iter().map(|s| s.map(str::to_string)).collect(), ); diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 51980bea3a1..ae0f5d25dce 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -44,8 +44,8 @@ const SYNTHETIC_SLOT_OFFSET: i64 = 1024; // A generic name for the firmware in slot1 of an NVMe device. // -// bhyve for example uses "1.0" and marks slot1 as read-only. -const SYNTHETIC_FIRMWARE_SLOT1: &str = "synthetic 1.0"; +// bhyve for example uses "1.0" and marks slot1 as read-only (must be within 8chars). +const SYNTHETIC_FIRMWARE_SLOT1: &str = "SYNTH1"; impl SyntheticDisk { // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing @@ -144,6 +144,7 @@ impl RawSyntheticDisk { 1, None, true, + 1, vec![Some(SYNTHETIC_FIRMWARE_SLOT1.to_string())], ); diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 8168f32cea5..b30a42cb50e 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -987,6 +987,7 @@ mod tests { disk.firmware.active_slot(), disk.firmware.next_active_slot(), disk.firmware.slot1_read_only(), + disk.firmware.number_of_slots(), slots, ); } From 1a9947e2e8691fc3280184e80c4e3277fe50d5c5 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Fri, 9 Aug 2024 22:09:26 +0000 Subject: [PATCH 2/8] Update omdb command to print inventory firmware info --- dev-tools/omdb/src/bin/omdb/db.rs | 81 ++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 1030e4288b1..c946bf1fe41 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -58,6 +58,7 @@ use nexus_db_model::HwBaseboardId; use nexus_db_model::Image; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvPhysicalDisk; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; @@ -4315,36 +4316,90 @@ async fn cmd_db_inventory_physical_disks( model: String, serial: String, variant: String, + firmware: String, + next_firmware: String, } - use db::schema::inv_physical_disk::dsl; - let mut query = dsl::inv_physical_disk.into_boxed(); + use db::schema::inv_nvme_disk_firmware::dsl as firmware_dsl; + use db::schema::inv_physical_disk::dsl as disk_dsl; + + let mut query = disk_dsl::inv_physical_disk.into_boxed(); query = query.limit(i64::from(u32::from(limit))); if let Some(collection_id) = args.collection_id { query = query.filter( - dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), + disk_dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), ); } if let Some(sled_id) = args.sled_id { - query = query.filter(dsl::sled_id.eq(sled_id.into_untyped_uuid())); + query = query.filter(disk_dsl::sled_id.eq(sled_id.into_untyped_uuid())); } let disks = query - .select(InvPhysicalDisk::as_select()) + .left_join( + firmware_dsl::inv_nvme_disk_firmware.on( + firmware_dsl::inv_collection_id + .eq(disk_dsl::inv_collection_id) + .and(firmware_dsl::sled_id.eq(disk_dsl::sled_id)) + .and(firmware_dsl::slot.eq(disk_dsl::slot)), + ), + ) + .select(( + InvPhysicalDisk::as_select(), + Option::::as_select(), + )) .load_async(&**conn) .await .context("loading physical disks")?; - let rows = disks.into_iter().map(|disk| DiskRow { - inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), - sled_id: disk.sled_id.into_untyped_uuid(), - slot: disk.slot, - vendor: disk.vendor, - model: disk.model.clone(), - serial: disk.serial.clone(), - variant: format!("{:?}", disk.variant), + let rows = disks.into_iter().map(|(disk, firmware)| { + let mut active_firmware = "UNKNOWN".to_string(); + let mut next_firmware = String::new(); + + 'firmware: { + // Ensure we have matching firmware information for a physical disk. + let Some(firmware) = firmware else { + break 'firmware; + }; + + // Attempt to read the fw version for the active slot. + if let Some(slot_info) = &firmware + .slot_firmware_versions + .get(firmware.active_slot.0 as usize - 1) + { + if let Some(fw_ver) = slot_info { + active_firmware = fw_ver.clone(); + } + }; + + // If we don't have firmware staged for next reset break early. + let Some(next_slot) = firmware.next_active_slot else { + break 'firmware; + }; + + // We have firmware staged for the next reset so attempt to read + // the fw version. + if let Some(slot_info) = + &firmware.slot_firmware_versions.get(next_slot.0 as usize - 1) + { + if let Some(fw_ver) = slot_info { + next_firmware = fw_ver.clone(); + } + } + } + + DiskRow { + inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), + sled_id: disk.sled_id.into_untyped_uuid(), + slot: disk.slot, + vendor: disk.vendor, + model: disk.model.clone(), + serial: disk.serial.clone(), + variant: format!("{:?}", disk.variant), + firmware: active_firmware, + next_firmware, + } }); let table = tabled::Table::new(rows) From ded63b4bac9c23495334f1bd1bd6321d413dcd9c Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Mon, 12 Aug 2024 20:32:37 +0000 Subject: [PATCH 3/8] Bump CRDB version --- nexus/db-model/src/schema_versions.rs | 3 ++- schema/crdb/dbinit.sql | 2 +- schema/crdb/inventory-nvme-firmware/up01.sql | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 schema/crdb/inventory-nvme-firmware/up01.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index dd9c9dc667a..7f3a29f2177 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(86, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(87, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(87, "inventory-nvme-firmware"), KnownVersion::new(86, "snapshot-replacement"), KnownVersion::new(85, "add-migrations-by-time-created-index"), KnownVersion::new(84, "region-read-only"), diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 32ac9cc1ebc..d376e78cebf 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4243,7 +4243,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '86.0.0', NULL) + (TRUE, NOW(), NOW(), '87.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/inventory-nvme-firmware/up01.sql b/schema/crdb/inventory-nvme-firmware/up01.sql new file mode 100644 index 00000000000..984f036196a --- /dev/null +++ b/schema/crdb/inventory-nvme-firmware/up01.sql @@ -0,0 +1,15 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + inv_collection_id UUID NOT NULL, + + sled_id UUID NOT NULL, + slot INT8 CHECK (slot >= 0) NOT NULL, + + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + next_active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7), + slot1_is_read_only BOOLEAN, + slot_firmware_versions STRING(8)[], + CONSTRAINT check_slot_firmware_versions_len CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + PRIMARY KEY (inv_collection_id, sled_id, slot) +); From 072fcacabd29e67ca5abd06fd48a63db2a62c2fc Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 14 Aug 2024 02:57:52 +0000 Subject: [PATCH 4/8] Address ixi's feedback --- dev-tools/omdb/src/bin/omdb/db.rs | 42 ++++++------------------------- nexus/db-model/src/inventory.rs | 25 ++++++++++++++++++ 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index c946bf1fe41..789e46327fa 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -4354,41 +4354,13 @@ async fn cmd_db_inventory_physical_disks( .context("loading physical disks")?; let rows = disks.into_iter().map(|(disk, firmware)| { - let mut active_firmware = "UNKNOWN".to_string(); - let mut next_firmware = String::new(); - - 'firmware: { - // Ensure we have matching firmware information for a physical disk. - let Some(firmware) = firmware else { - break 'firmware; - }; - - // Attempt to read the fw version for the active slot. - if let Some(slot_info) = &firmware - .slot_firmware_versions - .get(firmware.active_slot.0 as usize - 1) - { - if let Some(fw_ver) = slot_info { - active_firmware = fw_ver.clone(); - } - }; - - // If we don't have firmware staged for next reset break early. - let Some(next_slot) = firmware.next_active_slot else { - break 'firmware; + let (active_firmware, next_firmware) = + if let Some(firmware) = firmware.as_ref() { + (firmware.current_version(), firmware.next_version()) + } else { + (None, None) }; - // We have firmware staged for the next reset so attempt to read - // the fw version. - if let Some(slot_info) = - &firmware.slot_firmware_versions.get(next_slot.0 as usize - 1) - { - if let Some(fw_ver) = slot_info { - next_firmware = fw_ver.clone(); - } - } - } - DiskRow { inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), sled_id: disk.sled_id.into_untyped_uuid(), @@ -4397,8 +4369,8 @@ async fn cmd_db_inventory_physical_disks( model: disk.model.clone(), serial: disk.serial.clone(), variant: format!("{:?}", disk.variant), - firmware: active_firmware, - next_firmware, + firmware: active_firmware.unwrap_or("UNKNOWN").to_string(), + next_firmware: next_firmware.unwrap_or("").to_string(), } }); diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index fdcf20c853d..42320ab98a8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -918,6 +918,31 @@ impl InvNvmeDiskFirmware { }), } } + + /// Attempt to read the current firmware version. + pub fn current_version(&self) -> Option<&str> { + match self.active_slot.0 { + // be paranoid that we have a value within the NVMe spec + slot @ 1..=7 => self + .slot_firmware_versions + .get(usize::from(slot) - 1) + .and_then(|v| v.as_deref()), + _ => None, + } + } + + /// Attempt to read the staged firmware version that will be active upon + /// next device reset. + pub fn next_version(&self) -> Option<&str> { + match self.next_active_slot { + // be paranoid that we have a value within the NVMe spec + Some(slot) if slot.0 <= 7 && slot.0 >= 1 => self + .slot_firmware_versions + .get(usize::from(slot.0) - 1) + .and_then(|v| v.as_deref()), + _ => None, + } + } } /// See [`nexus_types::inventory::Zpool`]. From 2e66ded24c14761d4dab3102ccfef955d8ec1343 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 28 Aug 2024 21:10:17 +0000 Subject: [PATCH 5/8] Address Sean's feedback modulo testing --- nexus/db-model/src/inventory.rs | 34 ++++++------- .../db-queries/src/db/datastore/inventory.rs | 49 ++++++++++--------- schema/crdb/dbinit.sql | 1 + 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 6c63e044d4b..4f0ff60f186 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -32,8 +32,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZonesConfig, }; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PhysicalDiskFirmware, PowerState, - RotPage, RotSlot, + BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, + RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; use omicron_uuid_kinds::CollectionKind; @@ -897,25 +897,21 @@ pub struct InvNvmeDiskFirmware { } impl InvNvmeDiskFirmware { - pub fn try_from_physical_disk( + pub fn new( inv_collection_id: CollectionUuid, sled_id: SledUuid, - disk: nexus_types::inventory::PhysicalDisk, - ) -> Option { - match disk.firmware { - PhysicalDiskFirmware::Unknown => None, - PhysicalDiskFirmware::Nvme(firmware) => Some(Self { - inv_collection_id: inv_collection_id.into(), - sled_id: sled_id.into(), - slot: disk.slot, - active_slot: firmware.active_slot.into(), - next_active_slot: firmware - .next_active_slot - .map(|nas| nas.into()), - number_of_slots: firmware.number_of_slots.into(), - slot1_is_read_only: firmware.slot1_is_read_only, - slot_firmware_versions: firmware.slot_firmware_versions, - }), + sled_slot: i64, + firmware: &NvmeFirmware, + ) -> Self { + Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + slot: sled_slot, + active_slot: firmware.active_slot.into(), + next_active_slot: firmware.next_active_slot.map(|nas| nas.into()), + number_of_slots: firmware.number_of_slots.into(), + slot1_is_read_only: firmware.slot1_is_read_only, + slot_firmware_versions: firmware.slot_firmware_versions.clone(), } } diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index d72a480f076..ca2e460bc03 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -61,6 +61,7 @@ use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; +use nexus_types::inventory::PhysicalDiskFirmware; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; @@ -137,20 +138,22 @@ impl DataStore { .collect::, Error>>()?; // Pull disk firmware out of sled agents - let disk_firmware: Vec<_> = collection - .sled_agents - .iter() - .flat_map(|(sled_id, sled_agent)| { - sled_agent.disks.iter().map(|disk| { - InvNvmeDiskFirmware::try_from_physical_disk( - collection_id, - *sled_id, - disk.clone(), - ) - }) - }) - .flatten() - .collect(); + let mut nvme_disk_firmware = Vec::new(); + for (sled_id, sled_agent) in &collection.sled_agents { + for disk in &sled_agent.disks { + match &disk.firmware { + PhysicalDiskFirmware::Unknown => (), + PhysicalDiskFirmware::Nvme(firmware) => { + nvme_disk_firmware.push(InvNvmeDiskFirmware::new( + collection_id, + *sled_id, + disk.slot, + firmware, + )); + } + } + } + } // Pull disks out of all sled agents let disks: Vec<_> = collection @@ -749,9 +752,9 @@ impl DataStore { use db::schema::inv_nvme_disk_firmware::dsl; let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); - let mut disk_firmware = disk_firmware.into_iter(); + let mut nvme_disk_firmware = nvme_disk_firmware.into_iter(); loop { - let some_disk_firmware = disk_firmware + let some_disk_firmware = nvme_disk_firmware .by_ref() .take(batch_size) .collect::>(); @@ -1574,13 +1577,13 @@ impl DataStore { // Mapping of "Sled ID" -> "Mapping of physical slot -> disk firmware" let disk_firmware: BTreeMap< - Uuid, + SledUuid, BTreeMap, > = { use db::schema::inv_nvme_disk_firmware::dsl; let mut disk_firmware = BTreeMap::< - Uuid, + SledUuid, BTreeMap, >::new(); let mut paginator = Paginator::new(batch_size); @@ -1601,7 +1604,7 @@ impl DataStore { p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for firmware in batch { disk_firmware - .entry(firmware.sled_id.into_untyped_uuid()) + .entry(firmware.sled_id.into()) .or_default() .insert( firmware.slot, @@ -1628,13 +1631,13 @@ impl DataStore { // Mapping of "Sled ID" -> "All disks reported by that sled" let physical_disks: BTreeMap< - Uuid, + SledUuid, Vec, > = { use db::schema::inv_physical_disk::dsl; let mut disks = BTreeMap::< - Uuid, + SledUuid, Vec, >::new(); let mut paginator = Paginator::new(batch_size); @@ -1654,7 +1657,7 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for disk in batch { - let sled_id = disk.sled_id.into_untyped_uuid(); + let sled_id = disk.sled_id.into(); let firmware = disk_firmware .get(&sled_id) .and_then(|lookup| lookup.get(&disk.slot)) @@ -1807,7 +1810,7 @@ impl DataStore { usable_physical_ram: s.usable_physical_ram.into(), reservoir_size: s.reservoir_size.into(), disks: physical_disks - .get(sled_id.as_untyped_uuid()) + .get(&sled_id.into()) .map(|disks| disks.to_vec()) .unwrap_or_default(), zpools: zpools diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 8c9aee80924..dc0a29971b3 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3192,6 +3192,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( -- PK consisting of: -- - Which collection this was -- - The sled reporting the disk + -- - The slot in which the disk was found PRIMARY KEY (inv_collection_id, sled_id, slot) ); From f08a94cdb9502b1c53398c6359432cc0075a0b94 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Fri, 30 Aug 2024 18:55:59 +0000 Subject: [PATCH 6/8] Remove NVMe firmware inventory collection --- nexus/db-queries/src/db/datastore/inventory.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index ca2e460bc03..3b767a804f2 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1179,6 +1179,7 @@ impl DataStore { nrot_pages, nsled_agents, nphysical_disks, + nnvme_disk_disk_firmware, nsled_agent_zones, nzones, nnics, @@ -1263,6 +1264,17 @@ impl DataStore { .await? }; + // Remove rows for NVMe physical disk firmware found. + let nnvme_disk_firwmare = + { + use db::schema::inv_nvme_disk_firmware::dsl; + diesel::delete(dsl::inv_nvme_disk_firmware.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + // Remove rows associated with Omicron zones let nsled_agent_zones = { @@ -1323,6 +1335,7 @@ impl DataStore { nrot_pages, nsled_agents, nphysical_disks, + nnvme_disk_firwmare, nsled_agent_zones, nzones, nnics, @@ -1347,6 +1360,7 @@ impl DataStore { "nrot_pages" => nrot_pages, "nsled_agents" => nsled_agents, "nphysical_disks" => nphysical_disks, + "nnvme_disk_firmware" => nnvme_disk_disk_firmware, "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, From fdf5ab48a2d080656f60f07e09e1bd2541442cad Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Sat, 31 Aug 2024 18:35:33 +0000 Subject: [PATCH 7/8] clippy lint --- nexus/db-queries/src/db/datastore/inventory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 6d7a2801378..26834baa74b 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1824,7 +1824,7 @@ impl DataStore { usable_physical_ram: s.usable_physical_ram.into(), reservoir_size: s.reservoir_size.into(), disks: physical_disks - .get(&sled_id.into()) + .get(&sled_id) .map(|disks| disks.to_vec()) .unwrap_or_default(), zpools: zpools From 633cc23a7c1b2462ddfeddf68a68c20469a243f1 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Fri, 30 Aug 2024 18:55:59 +0000 Subject: [PATCH 8/8] Add some tests --- nexus/db-model/src/inventory.rs | 148 ++++++++++++ .../db-queries/src/db/datastore/inventory.rs | 211 ++++++++++++++++++ schema/crdb/dbinit.sql | 7 +- schema/crdb/inventory-nvme-firmware/up01.sql | 7 +- 4 files changed, 365 insertions(+), 8 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 07374f7761f..5637f987819 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -1546,3 +1546,151 @@ impl InvOmicronZoneNic { zone_nic.into_network_interface_for_zone(zone_id) } } + +#[cfg(test)] +mod test { + use omicron_uuid_kinds::TypedUuid; + + use crate::{typed_uuid, InvNvmeDiskFirmware}; + + #[test] + fn test_inv_nvme_disk_firmware() { + // We can retrieve active_firmware with sane values + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: None, + number_of_slots: 1.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("firmware".to_string())], + }; + assert_eq!(inv_firmware.current_version(), Some("firmware")); + assert_eq!(inv_firmware.next_version(), None); + + // We can retrieve active_firmware and next_active_firmware with sane + // values + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(2.into()), + number_of_slots: 2.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("ONE".to_string()), + Some("TWO".to_string()), + ], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), Some("TWO")); + + // A missing fw version string returns None + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(2.into()), + number_of_slots: 2.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("ONE".to_string()), None], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), None); + + // Number of slots and slot firmware versions disagreement returns None + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(4.into()), + number_of_slots: 4.into(), + slot1_is_read_only: true, + // number_of_slots reports 4 but the device only gave us two slots + slot_firmware_versions: vec![Some("ONE".to_string()), None], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), None); + + // Active slot is below 1 + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 0.into(), + next_active_slot: None, + number_of_slots: 1.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("ONE".to_string())], + }; + assert_eq!(inv_firmware.current_version(), None); + assert_eq!(inv_firmware.next_version(), None); + + // Active slot is above 7 + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 8.into(), + next_active_slot: None, + number_of_slots: 8.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + None, + None, + None, + None, + None, + None, + None, + Some("EIGHT".to_string()), + ], + }; + assert_eq!(inv_firmware.current_version(), None); + assert_eq!(inv_firmware.next_version(), None); + + // Next active slot is below 1 + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(0.into()), + number_of_slots: 2.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("ONE".to_string()), + Some("TWO".to_string()), + ], + }; + assert_eq!(inv_firmware.current_version(), Some("ONE")); + assert_eq!(inv_firmware.next_version(), None); + + // Next active slot is above 7 + let inv_firmware = InvNvmeDiskFirmware { + inv_collection_id: typed_uuid::DbTypedUuid(TypedUuid::new_v4()), + sled_id: TypedUuid::new_v4().into(), + slot: 1, + active_slot: 1.into(), + next_active_slot: Some(8.into()), + number_of_slots: 8.into(), + slot1_is_read_only: true, + slot_firmware_versions: vec![ + None, + None, + None, + None, + None, + None, + None, + Some("EIGHT".to_string()), + ], + }; + assert_eq!(inv_firmware.current_version(), None); + assert_eq!(inv_firmware.next_version(), None); + } +} diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 26834baa74b..a5afd03e68c 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2825,4 +2825,215 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + /// While other tests mostly exercise inserting [`PhysicalDisk`]s with + /// NVMe firmware we want additional checks on some constraints that the + /// `inv_nvme_disk_firwmare` table sets. + #[tokio::test] + async fn test_nvme_firmware_inventory_insert() { + use nexus_types::external_api::params::PhysicalDiskKind; + use nexus_types::inventory::{ + Collection, NvmeFirmware, PhysicalDisk, PhysicalDiskFirmware, + }; + + fn add_disk_with_firmware_to_collection( + active_slot: u8, + next_active_slot: Option, + number_of_slots: u8, + slot1_is_read_only: bool, + slot_firmware_versions: Vec>, + ) -> Collection { + let Representative { builder, .. } = representative(); + let mut collection = builder.build(); + + let disk = PhysicalDisk { + identity: omicron_common::disk::DiskIdentity { + vendor: "mike".to_string(), + model: "buffalo".to_string(), + serial: "716".to_string(), + }, + variant: PhysicalDiskKind::U2, + + slot: 1, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot, + next_active_slot, + number_of_slots, + slot1_is_read_only, + slot_firmware_versions, + }), + }; + + // Remove all disks on the first sled-agent and replace it with our + // new physical disk. + std::mem::swap(&mut collection + .sled_agents + .first_entry() + .expect("collection built from representative should have a sled agent") + .into_mut() + .disks, + &mut vec![disk]); + + collection + } + + let logctx = dev::test_setup_log("nvme_firmware_inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Verify we don't allow firmware with no firmware slots + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 0, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with 0 slots"); + + // Verify we don't allow firmware with too many slots + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 8, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with 8 slots"); + + // Verify we don't allow an active_slot of 0 + let collection = add_disk_with_firmware_to_collection( + 0, + None, + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with active_slot 0"); + + // Verify we don't allow an active_slot larger than 7 + let collection = add_disk_with_firmware_to_collection( + 8, + None, + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with active_slot 8"); + + // Verify we don't allow an next_active_slot of 0 + let collection = add_disk_with_firmware_to_collection( + 1, + Some(0), + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with next_active_slot 0"); + + // Verify we don't allow an next_active_slot larger than 7 + let collection = add_disk_with_firmware_to_collection( + 1, + Some(8), + 1, + true, + vec![Some("TEST".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect_err("cannot insert firmware with next_active_slot 8"); + + // XXX omicron#6514 + // - We would like to deny insertion when a firmware version string is + // larger than the NVMe spec would allow but we have discoverd that our + // version of CRDB will silently truncate a string to size bound rather + // than erroring. + // - The NVMe spec defines the firmware versions as an array of 7 + // entries made up of 8 characters (ascii data) so unless the device + // firmware has a bug or the rust libnvme binding has a bug the worst we + // should see is the truncated value. + // - The CRDB STRING type is checking unicode length rather than byte + // length. However, CRDB won't let us use the BYTES type with an ARRAY. + let collection = add_disk_with_firmware_to_collection( + 1, + None, + 1, + true, + vec![Some("FIRMWARETOOLARGE".to_string())], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("firmware inserted modulo version string truncation"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + let (_, value) = &collection_read + .sled_agents + .first_key_value() + .expect("we have a sled-agent in the collection"); + let disk = + value.disks.first().expect("sled-agent has the disk we inserted"); + let PhysicalDiskFirmware::Nvme(ref firmware) = disk.firmware else { + panic!( + "Didn't find the expected NVMe firmware that we just inserted" + ) + }; + assert_eq!( + firmware.slot_firmware_versions, + vec![Some("FIRMWARE".to_string())], + "Firmware versions contains the truncated string" + ); + + // Finally lets actually insert a valid configuration and assert that we + // can pull it back out correctly. + // + // - 3 slots + // - 1st slot is active + // - 1st slot is marked read-only + // - 2nd slot is marked active on next reset + // - 3rd slot has no firmware uploaded into it + let collection = add_disk_with_firmware_to_collection( + 1, + Some(2), + 3, + true, + vec![Some("FOO".to_string()), Some("BAR".to_string()), None], + ); + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("firmware inserted"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + assert_eq!(collection, collection_read); + + // NOTE that there is no validation that the bounds checked values + // in the database make sense. For example a device that reports + // a next_active_slot of 2 but only has a value of 1 in the + // number_of_slots field. These configurations are guarded against by + // the `InvNvmeDiskFirmware` type itself. + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index d4eec6ed884..443a55b3710 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3187,15 +3187,14 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( -- total number of firmware slots the device has number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, - active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, -- staged firmware slot to be active on reset - next_active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7), + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), -- slot1 is distinct in the NVMe spec in the sense that it can be read only slot1_is_read_only BOOLEAN, -- the firmware version string for each NVMe slot (0 indexed), a NULL means the -- slot exists but is empty - slot_firmware_versions STRING(8)[], - CONSTRAINT check_slot_firmware_versions_len CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), -- PK consisting of: -- - Which collection this was diff --git a/schema/crdb/inventory-nvme-firmware/up01.sql b/schema/crdb/inventory-nvme-firmware/up01.sql index 984f036196a..0191fd31f10 100644 --- a/schema/crdb/inventory-nvme-firmware/up01.sql +++ b/schema/crdb/inventory-nvme-firmware/up01.sql @@ -5,11 +5,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( slot INT8 CHECK (slot >= 0) NOT NULL, number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, - active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, - next_active_slot INT2 CHECK (number_of_slots BETWEEN 1 AND 7), + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), slot1_is_read_only BOOLEAN, - slot_firmware_versions STRING(8)[], - CONSTRAINT check_slot_firmware_versions_len CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), PRIMARY KEY (inv_collection_id, sled_id, slot) );