From 81f28dc4d50caf6e68f1826d5138ee30e7afb4da Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 18 Nov 2023 00:27:15 +0000 Subject: [PATCH] [Nexus] Add a sled to an initialized rack This commit provides an external API for adding a sled to an already initialized rack. --- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/rack.rs | 19 +- nexus/db-model/src/schema.rs | 10 ++ .../src/sled_underlay_subnet_allocation.rs | 16 ++ .../db-queries/src/db/datastore/inventory.rs | 20 +++ nexus/db-queries/src/db/datastore/rack.rs | 58 +++++++ nexus/src/app/rack.rs | 164 ++++++++++++++++-- nexus/src/external_api/http_entrypoints.rs | 25 +++ nexus/types/src/inventory.rs | 8 + schema/crdb/12.0.0/up1.sql | 37 ++++ schema/crdb/12.0.0/up2.sql | 5 + schema/crdb/dbinit.sql | 47 ++++- 12 files changed, 376 insertions(+), 35 deletions(-) create mode 100644 nexus/db-model/src/sled_underlay_subnet_allocation.rs create mode 100644 schema/crdb/12.0.0/up1.sql create mode 100644 schema/crdb/12.0.0/up2.sql diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 6b65eb87ec..ac5bad26f8 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -72,6 +72,7 @@ mod sled; mod sled_instance; mod sled_resource; mod sled_resource_kind; +mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; mod switch; @@ -153,6 +154,7 @@ pub use sled::*; pub use sled_instance::*; pub use sled_resource::*; pub use sled_resource_kind::*; +pub use sled_underlay_subnet_allocation::*; pub use snapshot::*; pub use ssh_key::*; pub use switch::*; diff --git a/nexus/db-model/src/rack.rs b/nexus/db-model/src/rack.rs index f2bc7528d2..580ec155b4 100644 --- a/nexus/db-model/src/rack.rs +++ b/nexus/db-model/src/rack.rs @@ -4,9 +4,8 @@ use crate::schema::rack; use db_macros::Asset; -use ipnetwork::{IpNetwork, Ipv6Network}; +use ipnetwork::IpNetwork; use nexus_types::{external_api::views, identity::Asset}; -use omicron_common::api; use uuid::Uuid; /// Information about a local rack. @@ -29,22 +28,6 @@ impl Rack { rack_subnet: None, } } - - pub fn subnet(&self) -> Result { - match self.rack_subnet { - Some(IpNetwork::V6(subnet)) => Ok(subnet), - Some(IpNetwork::V4(_)) => { - return Err(api::external::Error::InternalError { - internal_message: "rack subnet not IPv6".into(), - }) - } - None => { - return Err(api::external::Error::InternalError { - internal_message: "rack subnet not set".into(), - }) - } - } - } } impl From for views::Rack { diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 4844f2a33f..b33fde00af 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -754,6 +754,16 @@ table! { } } +table! { + sled_underlay_subnet_allocation (rack_id, sled_id) { + rack_id -> Uuid, + sled_id -> Uuid, + subnet_octet -> Int2, + hw_baseboard_id -> Uuid, + } +} +allow_tables_to_appear_in_same_query!(rack, sled_underlay_subnet_allocation); + table! { switch (id) { id -> Uuid, diff --git a/nexus/db-model/src/sled_underlay_subnet_allocation.rs b/nexus/db-model/src/sled_underlay_subnet_allocation.rs new file mode 100644 index 0000000000..4da0bea669 --- /dev/null +++ b/nexus/db-model/src/sled_underlay_subnet_allocation.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::schema::sled_underlay_subnet_allocation; +use uuid::Uuid; + +/// Underlay allocation for a sled added to an initialized rack +#[derive(Queryable, Insertable, Debug, Clone, Selectable)] +#[diesel(table_name = sled_underlay_subnet_allocation)] +pub struct SledUnderlaySubnetAllocation { + pub rack_id: Uuid, + pub sled_id: Uuid, + pub subnet_octet: i16, + pub hw_baseboard_id: Uuid, +} diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index b743d28ee8..0ccdcff9ab 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -39,6 +39,7 @@ use nexus_db_model::InvServiceProcessor; use nexus_db_model::SpType; use nexus_db_model::SpTypeEnum; use nexus_db_model::SwCaboose; +use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; @@ -798,6 +799,25 @@ impl DataStore { Ok(()) } + // Find the primary key for `hw_baseboard_id` given a `BaseboardId` + pub async fn find_hw_baseboard_id( + &self, + opctx: &OpContext, + baseboard_id: BaseboardId, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?; + let conn = self.pool_connection_authorized(opctx).await?; + use db::schema::hw_baseboard_id::dsl; + dsl::hw_baseboard_id + .filter(dsl::serial_number.eq(baseboard_id.serial_number)) + .filter(dsl::part_number.eq(baseboard_id.part_number)) + .select(dsl::id) + .limit(1) + .first_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Attempt to read the latest collection while limiting queries to `limit` /// records pub async fn inventory_get_latest_collection( diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index ae982d86f8..df0c6dc24b 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -41,6 +41,7 @@ use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; +use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -214,6 +215,63 @@ impl DataStore { Ok(()) } + // Return the subnet for the rack + pub async fn rack_subnet( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + use db::schema::rack::dsl; + // It's safe to unwrap the returned `rack_subnet` because + // we filter on `rack_subnet.is_not_null()` + dsl::rack + .filter(dsl::id.eq(rack_id)) + .filter(dsl::rack_subnet.is_not_null()) + .select(dsl::rack_subnet) + .limit(1) + .first_async::>(&*conn) + .await + .map(|net| net.unwrap()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Return all current underlay allocations for the rack. + /// + /// Order allocations by `subnet_octet` + pub async fn rack_subnet_allocations( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::sled_underlay_subnet_allocation::dsl as subnet_dsl; + subnet_dsl::sled_underlay_subnet_allocation + .filter(subnet_dsl::rack_id.eq(rack_id)) + .select(SledUnderlaySubnetAllocation::as_select()) + .order_by(subnet_dsl::subnet_octet.asc()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Store a new sled subnet allocation in the database + pub async fn sled_subnet_allocation_insert( + &self, + opctx: &OpContext, + allocation: &SledUnderlaySubnetAllocation, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use db::schema::sled_underlay_subnet_allocation::dsl; + diesel::insert_into(dsl::sled_underlay_subnet_allocation) + .values(allocation.clone()) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + } + // The following methods which return a `TxnError` take a `conn` parameter // which comes from the transaction created in `rack_set_initialized`. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 1c2e49e260..f48bf10499 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -10,9 +10,10 @@ use crate::external_api::params::CertificateCreate; use crate::external_api::shared::ServiceUsingCertificate; use crate::internal_api::params::RackInitializationRequest; use gateway_client::types::SpType; -use ipnetwork::IpNetwork; +use ipnetwork::{IpNetwork, Ipv6Network}; use nexus_db_model::DnsGroup; use nexus_db_model::InitialDnsGroup; +use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -36,6 +37,7 @@ use nexus_types::external_api::views; use nexus_types::external_api::views::Baseboard; use nexus_types::external_api::views::UninitializedSled; use nexus_types::internal_api::params::DnsRecord; +use omicron_common::address::{get_64_subnet, Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -45,7 +47,11 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::bail_unless; +use sled_agent_client::types::AddSledRequest; use sled_agent_client::types::EarlyNetworkConfigBody; +use sled_agent_client::types::StartSledAgentRequest; +use sled_agent_client::types::StartSledAgentRequestBody; use sled_agent_client::types::{ BgpConfig, BgpPeerConfig, EarlyNetworkConfig, PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, @@ -584,20 +590,7 @@ impl super::Nexus { if rack.rack_subnet.is_some() { return Ok(()); } - let addr = self - .sled_list(opctx, &DataPageParams::max_page()) - .await? - .get(0) - .ok_or(Error::InternalError { - internal_message: "no sleds at time of bootstore sync".into(), - })? - .address(); - - let sa = sled_agent_client::Client::new( - &format!("http://{}", addr), - self.log.clone(), - ); - + let sa = self.get_any_sled_agent(opctx).await?; let result = sa .read_network_bootstore_config_cache() .await @@ -619,7 +612,7 @@ impl super::Nexus { opctx: &OpContext, ) -> Result { let rack = self.rack_lookup(opctx, &self.rack_id).await?; - let subnet = rack.subnet()?; + let subnet = rack_subnet(rack.rack_subnet)?; let db_ports = self.active_port_settings(opctx).await?; let mut ports = Vec::new(); @@ -767,4 +760,143 @@ impl super::Nexus { uninitialized_sleds.retain(|s| !sled_baseboards.contains(&s.baseboard)); Ok(uninitialized_sleds) } + + /// Add a sled to an intialized rack + pub(crate) async fn add_sled_to_initialized_rack( + &self, + opctx: &OpContext, + sled: UninitializedSled, + ) -> Result<(), Error> { + let baseboard_id = sled.baseboard.clone().into(); + let hw_baseboard_id = + self.db_datastore.find_hw_baseboard_id(opctx, baseboard_id).await?; + + // Fetch all the existing allocations via self.rack_id + let allocations = self + .db_datastore + .rack_subnet_allocations(opctx, sled.rack_id) + .await?; + + // Calculate the allocation for the new sled by choosing the minimim + // octet. The returned allocations are ordered by octet, so we will know + // when we have a free one. However, if we already have an allocation + // for the given sled then reuse that one. + // TODO: This could all actually be done in SQL using a `next_item` query. + // See https://github.com/oxidecomputer/omicron/issues/4544 + const MIN_SUBNET_OCTET: i16 = 33; + let mut new_allocation = SledUnderlaySubnetAllocation { + rack_id: sled.rack_id, + sled_id: Uuid::new_v4(), + subnet_octet: MIN_SUBNET_OCTET, + hw_baseboard_id, + }; + let subnet = self.db_datastore.rack_subnet(opctx, sled.rack_id).await?; + let mut allocation_already_exists = false; + for allocation in allocations { + if allocation.hw_baseboard_id == new_allocation.hw_baseboard_id { + // We already have an allocation for this sled. + new_allocation = allocation; + allocation_already_exists = true; + break; + } + if allocation.subnet_octet == new_allocation.subnet_octet { + bail_unless!( + new_allocation.subnet_octet < 255, + "Too many sled subnets allocated" + ); + new_allocation.subnet_octet += 1; + } + } + let rack_subnet = + Ipv6Subnet::::from(rack_subnet(Some(subnet))?); + + // Write the new allocation row to CRDB. The UNIQUE constraint + // on `subnet_octet` will prevent dueling administrators reusing + // allocations when sleds are being added. We will need another + // mechanism ala generation numbers when we must interleave additions + // and removals of sleds. + if !allocation_already_exists { + self.db_datastore + .sled_subnet_allocation_insert(opctx, &new_allocation) + .await?; + } + + // Convert the baseboard as necessary + let baseboard = sled_agent_client::types::Baseboard::Gimlet { + identifier: sled.baseboard.serial.clone(), + model: sled.baseboard.serial.clone(), + revision: sled.baseboard.revision, + }; + + // Make the call to sled-agent + let req = AddSledRequest { + sled_id: baseboard, + start_request: StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: new_allocation.sled_id, + rack_id: new_allocation.rack_id, + use_trust_quorum: true, + is_lrtq_learner: true, + subnet: sled_agent_client::types::Ipv6Subnet { + net: get_64_subnet( + rack_subnet, + new_allocation.subnet_octet.try_into().unwrap(), + ) + .net() + .into(), + }, + }, + }, + }; + let sa = self.get_any_sled_agent(opctx).await?; + sa.add_sled_to_initialized_rack(&req).await.map_err(|e| { + Error::InternalError { + internal_message: format!( + "failed to add sled with baseboard {:?} to rack {}: {e}", + sled.baseboard, new_allocation.rack_id + ), + } + })?; + + Ok(()) + } + + async fn get_any_sled_agent( + &self, + opctx: &OpContext, + ) -> Result { + let addr = self + .sled_list(opctx, &DataPageParams::max_page()) + .await? + .get(0) + .ok_or(Error::InternalError { + internal_message: "no sleds at time of bootstore sync".into(), + })? + .address(); + + Ok(sled_agent_client::Client::new( + &format!("http://{}", addr), + self.log.clone(), + )) + } +} + +pub fn rack_subnet( + rack_subnet: Option, +) -> Result { + match rack_subnet { + Some(IpNetwork::V6(subnet)) => Ok(subnet), + Some(IpNetwork::V4(_)) => { + return Err(Error::InternalError { + internal_message: "rack subnet not IPv6".into(), + }) + } + None => { + return Err(Error::InternalError { + internal_message: "rack subnet not set".into(), + }) + } + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 428632bcf5..cf243b60ea 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -223,6 +223,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(switch_list)?; api.register(switch_view)?; api.register(uninitialized_sled_list)?; + api.register(add_sled_to_initialized_rack)?; api.register(user_builtin_list)?; api.register(user_builtin_view)?; @@ -4401,6 +4402,30 @@ async fn uninitialized_sled_list( }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Add a sled to an initialized rack +// +// TODO: In the future this should really be a PUT request, once we resolve +// https://github.com/oxidecomputer/omicron/issues/4494. It should also +// explicitly be tied to a rack via a `rack_id` path param. For now we assume +// we are only operating on single rack systems. +#[endpoint { + method = POST, + path = "/v1/system/hardware/sleds/", + tags = ["system/hardware"] +}] +async fn add_sled_to_initialized_rack( + rqctx: RequestContext>, + sled: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let nexus = &apictx.nexus; + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + nexus.add_sled_to_initialized_rack(&opctx, sled.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} // Sleds diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 112eec3a65..a838ba634a 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -20,6 +20,8 @@ use std::sync::Arc; use strum::EnumIter; use uuid::Uuid; +use crate::external_api::views::Baseboard; + /// Results of collecting hardware/software inventory from various Omicron /// components /// @@ -108,6 +110,12 @@ pub struct BaseboardId { pub serial_number: String, } +impl From for BaseboardId { + fn from(value: Baseboard) -> Self { + BaseboardId { part_number: value.part, serial_number: value.serial } + } +} + /// Caboose contents found during a collection /// /// These are normalized in the database. Each distinct `Caboose` is assigned a diff --git a/schema/crdb/12.0.0/up1.sql b/schema/crdb/12.0.0/up1.sql new file mode 100644 index 0000000000..450f8d42ef --- /dev/null +++ b/schema/crdb/12.0.0/up1.sql @@ -0,0 +1,37 @@ +-- Table of all sled subnets allocated for sleds added to an already initialized +-- rack. The sleds in this table and their allocated subnets are created before +-- a sled is added to the `sled` table. Addition to the `sled` table occurs +-- after the sled is initialized and notifies Nexus about itself. +-- +-- For simplicity and space savings, this table doesn't actually contain the +-- full subnets for a given sled, but only the octet that extends a /56 rack +-- subnet to a /64 sled subnet. The rack subnet is maintained in the `rack` +-- table. +-- +-- This table does not include subnet octets allocated during RSS and therefore +-- all of the octets start at 33. This makes the data in this table purely additive +-- post-RSS, which also implies that we cannot re-use subnet octets if an original +-- sled that was part of RSS was removed from the cluster. +CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( + -- The physical identity of the sled + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID PRIMARY KEY, + + -- The rack to which a sled is being added + -- (foreign key into `rack` table) + -- + -- We require this because the sled is not yet part of the sled table when + -- we first allocate a subnet for it. + rack_id UUID NOT NULL, + + -- The sled to which a subnet is being allocated + -- + -- Eventually will be a foreign key into the `sled` table when the sled notifies nexus + -- about itself after initialization. + sled_id UUID NOT NULL, + + -- The octet that extends a /56 rack subnet to a /64 sled subnet + -- + -- Always between 33 and 255 inclusive + subnet_octet INT2 NOT NULL UNIQUE +); diff --git a/schema/crdb/12.0.0/up2.sql b/schema/crdb/12.0.0/up2.sql new file mode 100644 index 0000000000..c3e18fa166 --- /dev/null +++ b/schema/crdb/12.0.0/up2.sql @@ -0,0 +1,5 @@ +-- Add an index which allows pagination by {rack_id, sled_id} pairs. +CREATE UNIQUE INDEX IF NOT EXISTS lookup_subnet_allocation_by_rack_and_sled ON omicron.public.sled_underlay_subnet_allocation ( + rack_id, + sled_id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index a74cabfe6e..b28ce8ed3f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -158,6 +158,51 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_resource_by_sled ON omicron.public.sled id ); + +-- Table of all sled subnets allocated for sleds added to an already initialized +-- rack. The sleds in this table and their allocated subnets are created before +-- a sled is added to the `sled` table. Addition to the `sled` table occurs +-- after the sled is initialized and notifies Nexus about itself. +-- +-- For simplicity and space savings, this table doesn't actually contain the +-- full subnets for a given sled, but only the octet that extends a /56 rack +-- subnet to a /64 sled subnet. The rack subnet is maintained in the `rack` +-- table. +-- +-- This table does not include subnet octets allocated during RSS and therefore +-- all of the octets start at 33. This makes the data in this table purely additive +-- post-RSS, which also implies that we cannot re-use subnet octets if an original +-- sled that was part of RSS was removed from the cluster. +CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( + -- The physical identity of the sled + -- (foreign key into `hw_baseboard_id` table) + hw_baseboard_id UUID PRIMARY KEY, + + -- The rack to which a sled is being added + -- (foreign key into `rack` table) + -- + -- We require this because the sled is not yet part of the sled table when + -- we first allocate a subnet for it. + rack_id UUID NOT NULL, + + -- The sled to which a subnet is being allocated + -- + -- Eventually will be a foreign key into the `sled` table when the sled notifies nexus + -- about itself after initialization. + sled_id UUID NOT NULL, + + -- The octet that extends a /56 rack subnet to a /64 sled subnet + -- + -- Always between 33 and 255 inclusive + subnet_octet INT2 NOT NULL UNIQUE +); + +-- Add an index which allows pagination by {rack_id, sled_id} pairs. +CREATE UNIQUE INDEX IF NOT EXISTS lookup_subnet_allocation_by_rack_and_sled ON omicron.public.sled_underlay_subnet_allocation ( + rack_id, + sled_id +); + /* * Switches */ @@ -2906,7 +2951,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '11.0.0', NULL) + ( TRUE, NOW(), NOW(), '12.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;