From a5e29992fc79970324974fcee7451fdc79c2510d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 14 May 2024 15:59:08 -0400 Subject: [PATCH] Allow recommissioning of previously-decommissioned sleds (#5733) This is a pretty small delta on top of #5698 that I've been sitting on for a few days before getting a chance to test it on a4x2 this afternoon. I set up a4x2 with three sleds (g0, g1, g3), and went through 3.5 add/remove cycles on sled g2 (3 add/remove pairs and a 4th add). g2 is present in `omdb db sleds`: ``` root@oxz_switch:~# omdb db sleds SERIAL IP ROLE POLICY STATE ID g3 [fd00:1122:3344:103::1]:12345 scrimlet in service active 13b8d30d-b66d-4333-b156-9aa2527e130b g2 [fd00:1122:3344:124::1]:12345 - in service active 218e4e9c-0a27-4460-a65c-7e93bef531c4 g1 [fd00:1122:3344:102::1]:12345 - in service active 661e9e3e-0beb-43fe-9606-109b7145b258 g0 [fd00:1122:3344:101::1]:12345 scrimlet in service active 9f19235f-bfa4-4032-9cad-608448b4f1a0 ``` I added `SledFilter::Decommissioned` specifically for use with omdb, where we can see the 3 previous times `g2` was present (each with a different sled ID): ``` root@oxz_switch:~# omdb db sleds -F decommissioned SERIAL IP ROLE POLICY STATE ID g2 [fd00:1122:3344:121::1]:12345 - expunged decommissioned a7b09236-c8ba-4025-934d-b5f0539d9379 g2 [fd00:1122:3344:123::1]:12345 - expunged decommissioned ec0e81bf-6366-4273-97bc-47223334dc90 g2 [fd00:1122:3344:122::1]:12345 - expunged decommissioned f5f8ba44-9681-4e6f-84f2-fd654c83dd23 ``` The current blueprint shows history of all four times the sled has been present, because we don't currently prune expunged zones: ``` root@oxz_switch:~# omdb nexus blueprints show current blueprint 5bfb182e-df8a-41e5-a8a3-862bb42f8feb parent: 5b2823c8-ca41-46b5-9efd-ffaae0e6e028 ----------------------------------------------------------------------------------------------- zone type zone ID disposition underlay IP ----------------------------------------------------------------------------------------------- sled 13b8d30d-b66d-4333-b156-9aa2527e130b: blueprint zones at generation 5 clickhouse 9ee21438-4a2f-499a-b867-58993a36f2f0 in service fd00:1122:3344:103::6 cockroach_db b0f22c64-a8e7-4c83-af26-f30a0dc413d5 in service fd00:1122:3344:103::3 crucible 29f3db85-722f-41ed-a66c-66aa31a31591 in service fd00:1122:3344:103::b crucible 40317028-eedf-487f-88ae-821632f05f39 in service fd00:1122:3344:103::8 crucible 50153da9-da61-4cd9-829c-18129e5a6c52 in service fd00:1122:3344:103::9 crucible 765dc0e2-1850-43b1-ae08-531d51772f14 in service fd00:1122:3344:103::a crucible bcc91ace-f816-400d-9198-22ed20e00ca3 in service fd00:1122:3344:103::c crucible_pantry bf1c3443-fbf5-4e16-affb-ee4e2598cbcb in service fd00:1122:3344:103::7 external_dns ed00e862-094c-449b-bfdb-ddba26f36bb2 in service fd00:1122:3344:103::4 internal_dns 6b4f3315-ce80-4d89-a495-ff96c5f573cd in service fd00:1122:3344:3::1 internal_ntp 6b9d837b-9bb9-46f1-8f48-c0c8a7d89882 in service fd00:1122:3344:103::d nexus 327c035e-628e-4880-b434-8d77ca362f21 in service fd00:1122:3344:103::5 sled 218e4e9c-0a27-4460-a65c-7e93bef531c4: blueprint zones at generation 3 crucible 5a7167ff-04ef-47fd-93bc-2adc7c8a7087 in service fd00:1122:3344:124::26 crucible 9ca15af1-79dc-4248-86d1-5d7d25987609 in service fd00:1122:3344:124::23 crucible a80accc8-3fdd-42b2-8425-eb246a7f0ba0 in service fd00:1122:3344:124::22 crucible d80a6050-f2e3-4827-8fd0-37c5c88ac87d in service fd00:1122:3344:124::25 crucible e92e8930-6f81-4f09-a520-415ef896a05f in service fd00:1122:3344:124::24 internal_ntp a37275e8-eb02-4cb3-b216-a47a892313e7 in service fd00:1122:3344:124::21 sled 661e9e3e-0beb-43fe-9606-109b7145b258: blueprint zones at generation 5 boundary_ntp 8eb79dcb-b6a0-4a24-835e-cf9e55b12495 in service fd00:1122:3344:102::d cockroach_db 294868f1-d76b-4ef6-a87b-399ec06ba9a3 in service fd00:1122:3344:102::3 cockroach_db 373fa1d2-597c-4245-a87b-31a7f734c806 in service fd00:1122:3344:102::4 crucible 00611d19-0752-4e6f-a57a-43df54089987 in service fd00:1122:3344:102::c crucible 05697e11-b936-4319-b1a6-ba8e46c77988 in service fd00:1122:3344:102::b crucible 612ad32d-321d-4159-9265-4c0eca972e2c in service fd00:1122:3344:102::9 crucible c03f67ee-7f38-445f-b9a5-dcfd1feb976c in service fd00:1122:3344:102::a crucible e2e46038-8ecb-4fe0-be1a-94530842cd65 in service fd00:1122:3344:102::8 crucible_pantry 4fea76f9-d88e-43e9-ade7-2345dd77d2a7 in service fd00:1122:3344:102::7 internal_dns 504f14cf-42ea-4457-b922-fb400eea1495 in service fd00:1122:3344:2::1 nexus 8d3ba915-17f6-4c2f-a94d-31120e89f17b in service fd00:1122:3344:102::5 oximeter 4d98514a-a352-4ddc-996f-912958d9a80d in service fd00:1122:3344:102::6 sled 9f19235f-bfa4-4032-9cad-608448b4f1a0: blueprint zones at generation 5 boundary_ntp 603a1fe0-1fd1-4fc1-968c-af95e1400f2d in service fd00:1122:3344:101::d cockroach_db d0f66442-cc46-48b3-9770-d473517a81f5 in service fd00:1122:3344:101::3 cockroach_db ed701854-eb03-43eb-8a7c-5c1aea73cc51 in service fd00:1122:3344:101::4 crucible 56151b6f-fa7d-4459-9540-6b04460d2c64 in service fd00:1122:3344:101::8 crucible 8b37469e-2660-498e-bad7-9c9158b4fbae in service fd00:1122:3344:101::9 crucible b602ce7d-2b92-4e12-8b95-dd2c4cad3397 in service fd00:1122:3344:101::a crucible d87cae8c-d85d-4a25-b7d6-f7bbce50f6e9 in service fd00:1122:3344:101::b crucible ea549438-9531-491e-b85b-b92668184c9f in service fd00:1122:3344:101::c crucible_pantry 51e18a3d-a4ce-4ee1-a900-3f23444c9bfd in service fd00:1122:3344:101::7 external_dns 60f77e70-af63-4b4d-b3b2-62610f707d47 in service fd00:1122:3344:101::5 internal_dns d47c3ade-ae61-49b0-8bb7-217a825517de in service fd00:1122:3344:1::1 nexus a1df997e-3595-4f24-979c-76232ae16164 in service fd00:1122:3344:101::6 sled a7b09236-c8ba-4025-934d-b5f0539d9379: blueprint zones at generation 4 crucible 8aac1836-3d74-4d88-af41-b9e2fda8d3f7 expunged fd00:1122:3344:121::23 crucible a0468efe-3f76-4244-9d92-03a70608c777 expunged fd00:1122:3344:121::22 crucible afc45a35-0860-41d8-bac1-c06ac442fd43 expunged fd00:1122:3344:121::24 crucible b596d696-6521-4ebb-b0b4-22a772f5d1d0 expunged fd00:1122:3344:121::25 crucible b5f1851a-695f-4223-bc67-d30df30e77d0 expunged fd00:1122:3344:121::26 internal_ntp 219fdc9b-fe80-436a-9d81-c8f7bb3f0364 expunged fd00:1122:3344:121::21 sled ec0e81bf-6366-4273-97bc-47223334dc90: blueprint zones at generation 4 crucible 5fe4544e-d44b-45fe-bf99-505007654e97 expunged fd00:1122:3344:123::22 crucible 76f11abf-cfbf-41ee-972b-1c38c6babd22 expunged fd00:1122:3344:123::26 crucible 87821b5d-ed10-4787-ab24-6b7411bcc5d8 expunged fd00:1122:3344:123::25 crucible 8ceb3b5b-eb61-4195-b873-6dac8108fd73 expunged fd00:1122:3344:123::23 crucible c1d0cfb9-9489-4fce-a213-40fb5c1b7b0f expunged fd00:1122:3344:123::24 internal_ntp 86a61155-e468-4906-b0b6-d12fa2f3f4dd expunged fd00:1122:3344:123::21 sled f5f8ba44-9681-4e6f-84f2-fd654c83dd23: blueprint zones at generation 4 crucible 4c82a05d-bbc0-40b2-8b85-f688b56b2f7b expunged fd00:1122:3344:122::22 crucible 6b6a6770-f552-438d-bca0-b1d8bdd8c2ed expunged fd00:1122:3344:122::24 crucible 83eccc0d-afea-4e27-a906-3a629c2094d1 expunged fd00:1122:3344:122::23 crucible 8f6cea3e-5489-4665-a37b-c6336c0e54c8 expunged fd00:1122:3344:122::26 crucible bdd61a6c-50b8-450c-b963-443d77a1c7f4 expunged fd00:1122:3344:122::25 internal_ntp ab8b9602-a861-4985-809d-30e4724a63ab expunged fd00:1122:3344:122::21 METADATA: created by: a1df997e-3595-4f24-979c-76232ae16164 created at: 2024-05-10T19:09:09.547Z comment: sled 218e4e9c-0a27-4460-a65c-7e93bef531c4: add zones internal DNS version: 11 external DNS version: 2 ``` Also visible in that blueprint is evidence that we went through the "add NTP" -> "add crucible" reconfigurator steps. I did not go so far as to add a Nexus to g2 each time (it's considerably more manually intensive to do so, which we might need to address if that's a thing we want to test more thoroughly). This also fixes a bug in `allocate_sled_underlay_subnet_octets` that I accidentally introduced in #5675, restoring idempotence up until the point where the sled has upserted itself. (The comments added should clarify this.) We can and should still fail on an attempt to add a sled where we (a) have an allocation and (b) have an entry in the `sled` table, but no longer fail on attempt to add a sled where we (a) have an allocation but (b) do NOT have an entry in the `sled` table. --- dev-tools/omdb/tests/usage_errors.out | 2 + nexus/db-model/src/schema_versions.rs | 3 +- .../src/sled_underlay_subnet_allocation.rs | 2 +- nexus/db-queries/src/db/datastore/rack.rs | 199 ++++++++++++++++-- nexus/src/app/mod.rs | 5 + nexus/src/app/rack.rs | 4 +- nexus/tests/integration_tests/rack.rs | 56 ++++- nexus/types/src/deployment/planning_input.rs | 13 ++ .../up1.sql | 2 + .../up2.sql | 1 + .../up3.sql | 3 + .../up4.sql | 1 + schema/crdb/dbinit.sql | 25 ++- 13 files changed, 275 insertions(+), 41 deletions(-) create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql create mode 100644 schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 3ffe579a23..15fc9d322e 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -279,6 +279,8 @@ Options: Possible values: - commissioned: All sleds that are currently part of the control plane cluster + - decommissioned: All sleds that were previously part of the control plane cluster + but have been decommissioned - discretionary: Sleds that are eligible for discretionary services - in-service: Sleds that are in service (even if they might not be eligible for discretionary services) diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 5a263ea536..afdf91074e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(61, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(62, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(62, "allocate-subnet-decommissioned-sleds"), KnownVersion::new(61, "blueprint-add-sled-state"), KnownVersion::new(60, "add-lookup-vmm-by-sled-id-index"), KnownVersion::new(59, "enforce-first-as-default"), diff --git a/nexus/db-model/src/sled_underlay_subnet_allocation.rs b/nexus/db-model/src/sled_underlay_subnet_allocation.rs index 8dae9da4b8..3cb9579f1b 100644 --- a/nexus/db-model/src/sled_underlay_subnet_allocation.rs +++ b/nexus/db-model/src/sled_underlay_subnet_allocation.rs @@ -8,7 +8,7 @@ use omicron_uuid_kinds::SledKind; use uuid::Uuid; /// Underlay allocation for a sled added to an initialized rack -#[derive(Queryable, Insertable, Debug, Clone, Selectable)] +#[derive(Queryable, Insertable, Debug, Clone, PartialEq, Eq, Selectable)] #[diesel(table_name = sled_underlay_subnet_allocation)] pub struct SledUnderlaySubnetAllocation { pub rack_id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 8e8913f7bd..04901c7785 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -21,6 +21,7 @@ use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; use crate::db::identity::Asset; +use crate::db::lookup::LookupPath; use crate::db::model::Dataset; use crate::db::model::IncompleteExternalIp; use crate::db::model::PhysicalDisk; @@ -41,6 +42,7 @@ use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; +use nexus_db_model::SledState; use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; @@ -183,8 +185,8 @@ impl From for Error { pub enum SledUnderlayAllocationResult { /// A new allocation was created New(SledUnderlaySubnetAllocation), - /// A prior allocation was found - Existing(SledUnderlaySubnetAllocation), + /// A prior allocation associated with a commissioned sled was found + CommissionedSled(SledUnderlaySubnetAllocation), } impl DataStore { @@ -327,8 +329,44 @@ impl DataStore { }; for allocation in allocations { if allocation.hw_baseboard_id == new_allocation.hw_baseboard_id { - // We already have an allocation for this sled. - return Ok(SledUnderlayAllocationResult::Existing(allocation)); + // We already have an allocation for this sled, but we need to + // check whether this allocation matches a sled that has been + // decommissioned. (The same physical sled, tracked by + // `hw_baseboard_id`, can be logically removed from the control + // plane via decommissioning, then added back again later, which + // requires allocating a new subnet.) + match LookupPath::new(opctx, self) + .sled_id(allocation.sled_id.into_untyped_uuid()) + .optional_fetch_for(authz::Action::Read) + .await? + .map(|(_, sled)| sled.state()) + { + Some(SledState::Active) => { + // This allocation is for an active sled; return the + // existing allocation. + return Ok( + SledUnderlayAllocationResult::CommissionedSled( + allocation, + ), + ); + } + Some(SledState::Decommissioned) => { + // This allocation was for a now-decommissioned sled; + // ignore it and keep searching. + } + None => { + // This allocation is still "new" in the sense that it + // is assigned to a sled that has not yet upserted + // itself to join the control plane. We must return + // `::New(_)` here to ensure idempotence of allocation + // (e.g., if we allocate a sled, but its sled-agent + // crashes before it can upsert itself, we need to be + // able to get the same allocation back again). + return Ok(SledUnderlayAllocationResult::New( + allocation, + )); + } + } } if allocation.subnet_octet == new_allocation.subnet_octet { bail_unless!( @@ -962,7 +1000,6 @@ mod test { }; use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::Discoverability; - use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; use crate::db::model::IpKind; use crate::db::model::IpPoolRange; @@ -1190,8 +1227,7 @@ mod test { logctx.cleanup_successful(); } - async fn create_test_sled(db: &DataStore) -> Sled { - let sled_id = Uuid::new_v4(); + async fn create_test_sled(db: &DataStore, sled_id: Uuid) -> Sled { let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); let sled_update = SledUpdate::new( sled_id, @@ -1270,9 +1306,9 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled1 = create_test_sled(&datastore).await; - let sled2 = create_test_sled(&datastore).await; - let sled3 = create_test_sled(&datastore).await; + let sled1 = create_test_sled(&datastore, Uuid::new_v4()).await; + let sled2 = create_test_sled(&datastore, Uuid::new_v4()).await; + let sled3 = create_test_sled(&datastore, Uuid::new_v4()).await; let service_ip_pool_ranges = vec![IpRange::try_from(( Ipv4Addr::new(1, 2, 3, 4), @@ -1621,7 +1657,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; // Ask for two Nexus services, with different external IPs. let nexus_ip_start = Ipv4Addr::new(1, 2, 3, 4); @@ -1904,7 +1940,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; let mut system = SystemDescription::new(); system @@ -2000,7 +2036,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sled = create_test_sled(&datastore).await; + let sled = create_test_sled(&datastore, Uuid::new_v4()).await; let ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let service_ip_pool_ranges = vec![IpRange::from(ip)]; @@ -2256,7 +2292,9 @@ mod test { SledUnderlayAllocationResult::New(allocation) => { allocation.subnet_octet } - SledUnderlayAllocationResult::Existing(allocation) => { + SledUnderlayAllocationResult::CommissionedSled( + allocation, + ) => { panic!("unexpected allocation {allocation:?}"); } }, @@ -2276,9 +2314,9 @@ mod test { ); // If we attempt to insert the same baseboards again, we should get the - // existing allocations back. - for (hw_baseboard_id, expected_octet) in - hw_baseboard_ids.into_iter().zip(expected) + // same new allocations back. + for (&hw_baseboard_id, prev_allocation) in + hw_baseboard_ids.iter().zip(&allocations) { match datastore .allocate_sled_underlay_subnet_octets( @@ -2288,17 +2326,134 @@ mod test { ) .await .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + assert_eq!(allocation, *prev_allocation); + } + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + panic!("unexpected allocation {allocation:?}"); + } + } + } + + // Pick one of the hw_baseboard_ids and insert a sled record. We should + // get back the `CommissionedSled` allocation result if we retry + // allocation of that baseboard. + create_test_sled( + &datastore, + allocations[0].sled_id.into_untyped_uuid(), + ) + .await; + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + hw_baseboard_ids[0], + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + panic!("unexpected allocation {allocation:?}"); + } + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + assert_eq!(allocation, allocations[0]); + } + } + + // If we attempt to insert the same baseboard again and that baseboard + // is only assigned to decommissioned sleds, we should get a new + // allocation. We'll pick one hw baseboard ID, create a `Sled` for it, + // decommission that sled, and confirm we get a new octet, five times in + // a loop (to emulate the same sled being added and decommissioned + // multiple times). + let mut next_expected_octet = *expected.last().unwrap() + 1; + let mut prior_allocation = allocations.last().unwrap().clone(); + let target_hw_baseboard_id = *hw_baseboard_ids.last().unwrap(); + for _ in 0..5 { + // Commission the sled. + let sled = create_test_sled( + &datastore, + prior_allocation.sled_id.into_untyped_uuid(), + ) + .await; + + // If we attempt this same baseboard again, we get the existing + // allocation back. + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() { SledUnderlayAllocationResult::New(allocation) => { panic!("unexpected allocation {allocation:?}"); } - SledUnderlayAllocationResult::Existing(allocation) => { - assert_eq!( - allocation.subnet_octet, expected_octet, - "unexpected octet for {allocation:?}" - ); + SledUnderlayAllocationResult::CommissionedSled(existing) => { + assert_eq!(existing, prior_allocation); } } + + // Decommission the sled. + let (authz_sled,) = LookupPath::new(&opctx, &datastore) + .sled_id(sled.id()) + .lookup_for(authz::Action::Modify) + .await + .expect("found target sled ID"); + datastore + .sled_set_policy_to_expunged(&opctx, &authz_sled) + .await + .expect("expunged sled"); + datastore + .sled_set_state_to_decommissioned(&opctx, &authz_sled) + .await + .expect("decommissioned sled"); + + // Attempt a new allocation for the same hw_baseboard_id. + let allocation = match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => allocation, + SledUnderlayAllocationResult::CommissionedSled(allocation) => { + panic!("unexpected existing allocation {allocation:?}"); + } + }; + + // We should get the next octet with a new sled ID. + assert_eq!(allocation.subnet_octet, next_expected_octet); + assert_ne!(allocation.sled_id.into_untyped_uuid(), sled.id()); + prior_allocation = allocation; + + // Ensure if we attempt this same baseboard again, we get the + // same allocation back (the sled hasn't been commissioned yet). + match datastore + .allocate_sled_underlay_subnet_octets( + &opctx, + rack_id, + target_hw_baseboard_id, + ) + .await + .unwrap() + { + SledUnderlayAllocationResult::New(allocation) => { + assert_eq!(prior_allocation, allocation); + } + SledUnderlayAllocationResult::CommissionedSled(existing) => { + panic!("unexpected allocation {existing:?}"); + } + } + + // Bump our expectations for the next iteration. + next_expected_octet += 1; } db.cleanup().await.unwrap(); diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index a7f12d30cd..4b77788c96 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -534,6 +534,11 @@ impl Nexus { &self.id } + /// Return the rack ID for this Nexus instance. + pub fn rack_id(&self) -> Uuid { + self.rack_id + } + /// Return the tunable configuration parameters, e.g. for use in tests. pub fn tunables(&self) -> &Tunables { &self.tunables diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 25c0824ce6..c766446f38 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -790,11 +790,11 @@ impl super::Nexus { .await? { SledUnderlayAllocationResult::New(allocation) => allocation, - SledUnderlayAllocationResult::Existing(allocation) => { + SledUnderlayAllocationResult::CommissionedSled(allocation) => { return Err(Error::ObjectAlreadyExists { type_name: ResourceType::Sled, object_name: format!( - "{} / {} ({})", + "{} ({}): {}", sled.serial, sled.part, allocation.sled_id ), }); diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index a7ebf0f8b6..3e10ebcca4 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -5,6 +5,10 @@ use dropshot::ResultsPage; use http::Method; use http::StatusCode; +use nexus_client::types::SledId; +use nexus_db_model::SledBaseboard; +use nexus_db_model::SledSystemHardware; +use nexus_db_model::SledUpdate; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; @@ -17,7 +21,7 @@ use nexus_types::internal_api::params::SledAgentInfo; use nexus_types::internal_api::params::SledRole; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; -use omicron_nexus::TestInterfaces; +use omicron_uuid_kinds::GenericUuid; use uuid::Uuid; type ControlPlaneTestContext = @@ -170,7 +174,7 @@ async fn test_sled_add(cptestctx: &ControlPlaneTestContext) { // Add one of these sleds. let add_url = "/v1/system/hardware/sleds/"; let baseboard = uninitialized_sleds.pop().unwrap().baseboard; - NexusRequest::objects_post( + let sled_id = NexusRequest::objects_post( external_client, add_url, ¶ms::UninitializedSledId { @@ -179,11 +183,53 @@ async fn test_sled_add(cptestctx: &ControlPlaneTestContext) { }, ) .authn_as(AuthnMode::PrivilegedUser) - .execute() + .execute_and_parse_unwrap::() .await - .expect("failed to add sled"); + .id; - // Attempting to add the same sled again should fail. + // Attempting to add the same sled again should succeed with the same sled + // ID: this operation should be idempotent up until the point at which the + // sled is inserted in the db. + let repeat_sled_id = NexusRequest::objects_post( + external_client, + add_url, + ¶ms::UninitializedSledId { + serial: baseboard.serial.clone(), + part: baseboard.part.clone(), + }, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await + .id; + assert_eq!(sled_id, repeat_sled_id); + + // Now upsert the sled. + let nexus = &cptestctx.server.apictx().nexus; + nexus + .datastore() + .sled_upsert(SledUpdate::new( + sled_id.into_untyped_uuid(), + "[::1]:0".parse().unwrap(), + SledBaseboard { + serial_number: baseboard.serial.clone(), + part_number: baseboard.part.clone(), + revision: 0, + }, + SledSystemHardware { + is_scrimlet: false, + usable_hardware_threads: 8, + usable_physical_ram: (1 << 30).try_into().unwrap(), + reservoir_size: (1 << 20).try_into().unwrap(), + }, + nexus.rack_id(), + Generation::new().into(), + )) + .await + .expect("inserted sled"); + + // The sled has been commissioned as part of the rack, so adding it should + // fail. let error: dropshot::HttpErrorResponseBody = NexusRequest::expect_failure_with_body( external_client, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 1975cfaae0..89d8bae660 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -250,6 +250,14 @@ pub enum SledFilter { /// fetch "all sleds regardless of current policy or state". Commissioned, + /// All sleds that were previously part of the control plane cluster but + /// have been decommissioned. + /// + /// Any sleds matching this filter are expected to no longer be present. + /// This filter is only useful for historical or debugging purposes, such as + /// listing decommissioned sleds via `omdb`. + Decommissioned, + /// Sleds that are eligible for discretionary services. Discretionary, @@ -312,6 +320,7 @@ impl SledPolicy { provision_policy: SledProvisionPolicy::Provisionable, } => match filter { SledFilter::Commissioned => true, + SledFilter::Decommissioned => false, SledFilter::Discretionary => true, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -322,6 +331,7 @@ impl SledPolicy { provision_policy: SledProvisionPolicy::NonProvisionable, } => match filter { SledFilter::Commissioned => true, + SledFilter::Decommissioned => false, SledFilter::Discretionary => false, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -330,6 +340,7 @@ impl SledPolicy { }, SledPolicy::Expunged => match filter { SledFilter::Commissioned => true, + SledFilter::Decommissioned => true, SledFilter::Discretionary => false, SledFilter::InService => false, SledFilter::QueryDuringInventory => false, @@ -360,6 +371,7 @@ impl SledState { match self { SledState::Active => match filter { SledFilter::Commissioned => true, + SledFilter::Decommissioned => false, SledFilter::Discretionary => true, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -368,6 +380,7 @@ impl SledState { }, SledState::Decommissioned => match filter { SledFilter::Commissioned => false, + SledFilter::Decommissioned => true, SledFilter::Discretionary => false, SledFilter::InService => false, SledFilter::QueryDuringInventory => false, diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql new file mode 100644 index 0000000000..adffd4a2cf --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up1.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.sled_underlay_subnet_allocation + ALTER PRIMARY KEY USING COLUMNS (hw_baseboard_id, sled_id); diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql new file mode 100644 index 0000000000..ba67d093f4 --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up2.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS sled_underlay_subnet_allocation_hw_baseboard_id_key CASCADE; diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql new file mode 100644 index 0000000000..f96b3312c9 --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up3.sql @@ -0,0 +1,3 @@ +CREATE UNIQUE INDEX IF NOT EXISTS commissioned_sled_uniqueness + ON omicron.public.sled (serial_number, part_number) + WHERE sled_state != 'decommissioned'; diff --git a/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql b/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql new file mode 100644 index 0000000000..9489a61c2a --- /dev/null +++ b/schema/crdb/allocate-subnet-decommissioned-sleds/up4.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS serial_part_revision_unique CASCADE; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e66f28d74f..fa0c74aac2 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -148,15 +148,18 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( sled_state omicron.public.sled_state NOT NULL, /* Generation number owned and incremented by the sled-agent */ - sled_agent_gen INT8 NOT NULL DEFAULT 1, - - -- This constraint should be upheld, even for deleted disks - -- in the fleet. - CONSTRAINT serial_part_revision_unique UNIQUE ( - serial_number, part_number, revision - ) + sled_agent_gen INT8 NOT NULL DEFAULT 1 ); +-- Add an index that ensures a given physical sled (identified by serial and +-- part number) can only be a commissioned member of the control plane once. +-- +-- TODO Should `sled` reference `hw_baseboard_id` instead of having its own +-- serial/part columns? +CREATE UNIQUE INDEX IF NOT EXISTS commissioned_sled_uniqueness + ON omicron.public.sled (serial_number, part_number) + WHERE sled_state != 'decommissioned'; + /* Add an index which lets us look up sleds on a rack */ CREATE UNIQUE INDEX IF NOT EXISTS lookup_sled_by_rack ON omicron.public.sled ( rack_id, @@ -222,7 +225,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_resource_by_sled ON omicron.public.sled CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( -- The physical identity of the sled -- (foreign key into `hw_baseboard_id` table) - hw_baseboard_id UUID PRIMARY KEY, + hw_baseboard_id UUID, -- The rack to which a sled is being added -- (foreign key into `rack` table) @@ -240,7 +243,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( -- The octet that extends a /56 rack subnet to a /64 sled subnet -- -- Always between 33 and 255 inclusive - subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255) + subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255), + + PRIMARY KEY (hw_baseboard_id, sled_id) ); -- Add an index which allows pagination by {rack_id, sled_id} pairs. @@ -3856,7 +3861,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '61.0.0', NULL) + (TRUE, NOW(), NOW(), '62.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;