From 00e68110484172d3a9166fc5b1ad0713e900b888 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 6 May 2024 15:06:30 -0400 Subject: [PATCH] Allow recommissioning of previously-decommissioned sleds --- nexus/db-model/src/schema_versions.rs | 3 +- .../src/sled_underlay_subnet_allocation.rs | 2 +- nexus/db-queries/src/db/datastore/rack.rs | 200 ++++++++++++++++-- nexus/src/app/rack.rs | 4 +- .../up1.sql | 2 + .../up2.sql | 1 + .../up3.sql | 3 + .../up4.sql | 1 + schema/crdb/dbinit.sql | 25 ++- 9 files changed, 205 insertions(+), 36 deletions(-) create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 5a263ea536..afdf91074e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(61, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(62, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(62, "allocate-subnet-decommissioned-sleds"), KnownVersion::new(61, "blueprint-add-sled-state"), KnownVersion::new(60, "add-lookup-vmm-by-sled-id-index"), KnownVersion::new(59, "enforce-first-as-default"), diff --git a/nexus/db-model/src/sled_underlay_subnet_allocation.rs b/nexus/db-model/src/sled_underlay_subnet_allocation.rs index 8dae9da4b8..3cb9579f1b 100644 --- a/nexus/db-model/src/sled_underlay_subnet_allocation.rs +++ b/nexus/db-model/src/sled_underlay_subnet_allocation.rs @@ -8,7 +8,7 @@ use omicron_uuid_kinds::SledKind; use uuid::Uuid; /// Underlay allocation for a sled added to an initialized rack -#[derive(Queryable, Insertable, Debug, Clone, Selectable)] +#[derive(Queryable, Insertable, Debug, Clone, PartialEq, Eq, Selectable)] #[diesel(table_name = sled_underlay_subnet_allocation)] pub struct SledUnderlaySubnetAllocation { pub rack_id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 8e8913f7bd..e2fbaa2f19 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -21,6 +21,7 @@ use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; use crate::db::identity::Asset; +use crate::db::lookup::LookupPath; use crate::db::model::Dataset; use crate::db::model::IncompleteExternalIp; use crate::db::model::PhysicalDisk; @@ -41,6 +42,7 @@ use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; +use nexus_db_model::SledState; use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; @@ -183,8 +185,8 @@ impl From for Error { pub enum SledUnderlayAllocationResult { /// A new allocation was created New(SledUnderlaySubnetAllocation), - /// A prior allocation was found - Existing(SledUnderlaySubnetAllocation), + /// A prior allocation associated with a commissioned was found + CommissionedSled(SledUnderlaySubnetAllocation), } impl DataStore { @@ -327,8 +329,45 @@ impl DataStore { }; for allocation in allocations { if allocation.hw_baseboard_id == new_allocation.hw_baseboard_id { - // We already have an allocation for this sled. - return Ok(SledUnderlayAllocationResult::Existing(allocation)); + // We already have an allocation for this sled, but we need to + // check whether this allocation matches a sled that has been + // decommissioned. (The same physical sled, tracked by + // `hw_baseboard_id`, can be logically removed from the control + // plane via decommissioning, then added back again later, which + // requires allocating a new subnet.) + match LookupPath::new(opctx, self) + .sled_id(allocation.sled_id.into_untyped_uuid()) + .optional_fetch_for(authz::Action::Read) + .await? + .map(|(_, sled)| sled.state()) + { + Some(SledState::Active) => { + // This allocation is for an active sled or for a sled + // that hasn't yet been added to the control plane; + // return the existing allocation. + return Ok( + SledUnderlayAllocationResult::CommissionedSled( + allocation, + ), + ); + } + Some(SledState::Decommissioned) => { + // This allocation was for a now-decommissioned sled; + // ignore it and keep searching. + } + None => { + // This allocation is still "new" in the sense that it + // is assigned to a sled that has not yet upserted + // itself to join the control plane. We must return + // `::New(_)` here to ensure idempotence of allocation + // (e.g., if we allocate a sled, but its sled-agent + // crashes before it can upsert itself, we need to be + // able to get the same allocation back again). + return Ok(SledUnderlayAllocationResult::New( + allocation, + )); + } + } } if allocation.subnet_octet == new_allocation.subnet_octet { bail_unless!( @@ -962,7 +1001,6 @@ mod test { }; use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::Discoverability; - use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; use crate::db::model::IpKind; use crate::db::model::IpPoolRange; @@ -1190,8 +1228,7 @@ mod test { logctx.cleanup_successful(); } - async fn create_test_sled(db: &DataStore) -> Sled { - let sled_id = Uuid::new_v4(); + async fn create_test_sled(db: &DataStore, sled_id: Uuid) -> Sled { let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); let sled_update = SledUpdate::new( sled_id, @@ -1270,9 +1307,9 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled1 = create_test_sled(&datastore).await; - let sled2 = create_test_sled(&datastore).await; - let sled3 = create_test_sled(&datastore).await; + let sled1 = create_test_sled(&datastore, Uuid::new_v4()).await; + let sled2 = create_test_sled(&datastore, Uuid::new_v4()).await; + let sled3 = create_test_sled(&datastore, Uuid::new_v4()).await; let service_ip_pool_ranges = vec![IpRange::try_from(( Ipv4Addr::new(1, 2, 3, 4), @@ -1621,7 +1658,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; // Ask for two Nexus services, with different external IPs. let nexus_ip_start = Ipv4Addr::new(1, 2, 3, 4); @@ -1904,7 +1941,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; let mut system = SystemDescription::new(); system @@ -2000,7 +2037,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; let ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let service_ip_pool_ranges = vec![IpRange::from(ip)]; @@ -2256,7 +2293,9 @@ mod test { SledUnderlayAllocationResult::New(allocation) => { allocation.subnet_octet } - SledUnderlayAllocationResult::Existing(allocation) => { + SledUnderlayAllocationResult::CommissionedSled( + allocation, + ) => { panic!("unexpected allocation {allocation:?}"); } }, @@ -2276,9 +2315,9 @@ mod test { ); // If we attempt to insert the same baseboards again, we should get the - // existing allocations back. - for (hw_baseboard_id, expected_octet) in - hw_baseboard_ids.into_iter().zip(expected) + // same new allocations back. + for (&hw_baseboard_id, prev_allocation) in + hw_baseboard_ids.iter().zip(&allocations) { match datastore .allocate_sled_underlay_subnet_octets( @@ -2288,17 +2327,134 @@ mod test { ) .await .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + assert_eq!(allocation, *prev_allocation); + } + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + panic!("unexpected allocation {allocation:?}"); + } + } + } + + // Pick one of the hw_baseboard_ids and insert a sled record. We should + // get back the `CommissionedSled` allocation result if we retry + // allocation of that baseboard. + create_test_sled( + &datastore, + allocations[0].sled_id.into_untyped_uuid(), + ) + .await; + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + hw_baseboard_ids[0], + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + panic!("unexpected allocation {allocation:?}"); + } + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + assert_eq!(allocation, allocations[0]); + } + } + + // If we attempt to insert the same baseboard again and that baseboard + // is only assigned to decommissioned sleds, we should get a new + // allocation. We'll pick one hw baseboard ID, create a `Sled` for it, + // decommission that sled, and confirm we get a new octet, five times in + // a loop (to emulate the same sled being added and decommissioned + // multiple times). + let mut next_expected_octet = *expected.last().unwrap() + 1; + let mut prior_allocation = allocations.last().unwrap().clone(); + let target_hw_baseboard_id = *hw_baseboard_ids.last().unwrap(); + for _ in 0..5 { + // Commission the sled. + let sled = create_test_sled( + &datastore, + prior_allocation.sled_id.into_untyped_uuid(), + ) + .await; + + // If we attempt this same baseboard again, we get the existing + // allocation back. + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() { SledUnderlayAllocationResult::New(allocation) => { panic!("unexpected allocation {allocation:?}"); } - SledUnderlayAllocationResult::Existing(allocation) => { - assert_eq!( - allocation.subnet_octet, expected_octet, - "unexpected octet for {allocation:?}" - ); + SledUnderlayAllocationResult::CommissionedSled(existing) => { + assert_eq!(existing, prior_allocation); } } + + // Decommission the sled. + let (authz_sled,) = LookupPath::new(&opctx, &datastore) + .sled_id(sled.id()) + .lookup_for(authz::Action::Modify) + .await + .expect("found target sled ID"); + datastore + .sled_set_policy_to_expunged(&opctx, &authz_sled) + .await + .expect("expunged sled"); + datastore + .sled_set_state_to_decommissioned(&opctx, &authz_sled) + .await + .expect("decommissioned sled"); + + // Attempt a new allocation for the same hw_baseboard_id. + let allocation = match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => allocation, + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + panic!("unexpected existing allocation {allocation:?}"); + } + }; + + // We should get the next octet with a new sled ID. + assert_eq!(allocation.subnet_octet, next_expected_octet); + assert_ne!(allocation.sled_id.into_untyped_uuid(), sled.id()); + prior_allocation = allocation; + + // Ensure if we attempt this same baseboard again, we get the + // same allocation back (the sled hasn't been commissioned yet). + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + assert_eq!(prior_allocation, allocation); + } + SledUnderlayAllocationResult::CommissionedSled(existing) => { + panic!("unexpected allocation {existing:?}"); + } + } + + // Bump our expectations for the next iteration. + next_expected_octet += 1; } db.cleanup().await.unwrap(); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 25c0824ce6..c766446f38 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -790,11 +790,11 @@ impl super::Nexus { .await? { SledUnderlayAllocationResult::New(allocation) => allocation, - SledUnderlayAllocationResult::Existing(allocation) => { + SledUnderlayAllocationResult::CommissionedSled(allocation) => { return Err(Error::ObjectAlreadyExists { type_name: ResourceType::Sled, object_name: format!( - "{} / {} ({})", + "{} ({}): {}", sled.serial, sled.part, allocation.sled_id ), }); diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql new file mode 100644 index 0000000000..adffd4a2cf --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.sled_underlay_subnet_allocation + ALTER PRIMARY KEY USING COLUMNS (hw_baseboard_id, sled_id); diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql new file mode 100644 index 0000000000..ba67d093f4 --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS sled_underlay_subnet_allocation_hw_baseboard_id_key CASCADE; diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql new file mode 100644 index 0000000000..f96b3312c9 --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql @@ -0,0 +1,3 @@ +CREATE UNIQUE INDEX IF NOT EXISTS commissioned_sled_uniqueness + ON omicron.public.sled (serial_number, part_number) + WHERE sled_state != 'decommissioned'; diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql new file mode 100644 index 0000000000..9489a61c2a --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS serial_part_revision_unique CASCADE; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e66f28d74f..fa0c74aac2 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -148,15 +148,18 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( sled_state omicron.public.sled_state NOT NULL, /* Generation number owned and incremented by the sled-agent */ - sled_agent_gen INT8 NOT NULL DEFAULT 1, - - -- This constraint should be upheld, even for deleted disks - -- in the fleet. - CONSTRAINT serial_part_revision_unique UNIQUE ( - serial_number, part_number, revision - ) + sled_agent_gen INT8 NOT NULL DEFAULT 1 ); +-- Add an index that ensures a given physical sled (identified by serial and +-- part number) can only be a commissioned member of the control plane once. +-- +-- TODO Should `sled` reference `hw_baseboard_id` instead of having its own +-- serial/part columns? +CREATE UNIQUE INDEX IF NOT EXISTS commissioned_sled_uniqueness + ON omicron.public.sled (serial_number, part_number) + WHERE sled_state != 'decommissioned'; + /* Add an index which lets us look up sleds on a rack */ CREATE UNIQUE INDEX IF NOT EXISTS lookup_sled_by_rack ON omicron.public.sled ( rack_id, @@ -222,7 +225,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_resource_by_sled ON omicron.public.sled CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( -- The physical identity of the sled -- (foreign key into `hw_baseboard_id` table) - hw_baseboard_id UUID PRIMARY KEY, + hw_baseboard_id UUID, -- The rack to which a sled is being added -- (foreign key into `rack` table) @@ -240,7 +243,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( -- The octet that extends a /56 rack subnet to a /64 sled subnet -- -- Always between 33 and 255 inclusive - subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255) + subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255), + + PRIMARY KEY (hw_baseboard_id, sled_id) ); -- Add an index which allows pagination by {rack_id, sled_id} pairs. @@ -3856,7 +3861,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '61.0.0', NULL) + (TRUE, NOW(), NOW(), '62.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;