Skip to content

Commit

Permalink
[Nexus] Add a sled to an initialized rack
Browse files Browse the repository at this point in the history
This commit provides an external API for adding a sled to an already
initialized rack.
  • Loading branch information
andrewjstone committed Nov 21, 2023
1 parent cd2d23b commit 81f28dc
Show file tree
Hide file tree
Showing 12 changed files with 376 additions and 35 deletions.
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ mod sled;
mod sled_instance;
mod sled_resource;
mod sled_resource_kind;
mod sled_underlay_subnet_allocation;
mod snapshot;
mod ssh_key;
mod switch;
Expand Down Expand Up @@ -153,6 +154,7 @@ pub use sled::*;
pub use sled_instance::*;
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use sled_underlay_subnet_allocation::*;
pub use snapshot::*;
pub use ssh_key::*;
pub use switch::*;
Expand Down
19 changes: 1 addition & 18 deletions nexus/db-model/src/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@

use crate::schema::rack;
use db_macros::Asset;
use ipnetwork::{IpNetwork, Ipv6Network};
use ipnetwork::IpNetwork;
use nexus_types::{external_api::views, identity::Asset};
use omicron_common::api;
use uuid::Uuid;

/// Information about a local rack.
Expand All @@ -29,22 +28,6 @@ impl Rack {
rack_subnet: None,
}
}

pub fn subnet(&self) -> Result<Ipv6Network, api::external::Error> {
match self.rack_subnet {
Some(IpNetwork::V6(subnet)) => Ok(subnet),
Some(IpNetwork::V4(_)) => {
return Err(api::external::Error::InternalError {
internal_message: "rack subnet not IPv6".into(),
})
}
None => {
return Err(api::external::Error::InternalError {
internal_message: "rack subnet not set".into(),
})
}
}
}
}

impl From<Rack> for views::Rack {
Expand Down
10 changes: 10 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,16 @@ table! {
}
}

table! {
sled_underlay_subnet_allocation (rack_id, sled_id) {
rack_id -> Uuid,
sled_id -> Uuid,
subnet_octet -> Int2,
hw_baseboard_id -> Uuid,
}
}
allow_tables_to_appear_in_same_query!(rack, sled_underlay_subnet_allocation);

table! {
switch (id) {
id -> Uuid,
Expand Down
16 changes: 16 additions & 0 deletions nexus/db-model/src/sled_underlay_subnet_allocation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use crate::schema::sled_underlay_subnet_allocation;
use uuid::Uuid;

/// Underlay allocation for a sled added to an initialized rack
#[derive(Queryable, Insertable, Debug, Clone, Selectable)]
#[diesel(table_name = sled_underlay_subnet_allocation)]
pub struct SledUnderlaySubnetAllocation {
pub rack_id: Uuid,
pub sled_id: Uuid,
pub subnet_octet: i16,
pub hw_baseboard_id: Uuid,
}
20 changes: 20 additions & 0 deletions nexus/db-queries/src/db/datastore/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use nexus_db_model::InvServiceProcessor;
use nexus_db_model::SpType;
use nexus_db_model::SpTypeEnum;
use nexus_db_model::SwCaboose;
use nexus_types::inventory::BaseboardId;
use nexus_types::inventory::Collection;
use omicron_common::api::external::Error;
use omicron_common::api::external::InternalContext;
Expand Down Expand Up @@ -798,6 +799,25 @@ impl DataStore {
Ok(())
}

// Find the primary key for `hw_baseboard_id` given a `BaseboardId`
pub async fn find_hw_baseboard_id(
&self,
opctx: &OpContext,
baseboard_id: BaseboardId,
) -> Result<Uuid, Error> {
opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?;
let conn = self.pool_connection_authorized(opctx).await?;
use db::schema::hw_baseboard_id::dsl;
dsl::hw_baseboard_id
.filter(dsl::serial_number.eq(baseboard_id.serial_number))
.filter(dsl::part_number.eq(baseboard_id.part_number))
.select(dsl::id)
.limit(1)
.first_async::<Uuid>(&*conn)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Attempt to read the latest collection while limiting queries to `limit`
/// records
pub async fn inventory_get_latest_collection(
Expand Down
58 changes: 58 additions & 0 deletions nexus/db-queries/src/db/datastore/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use nexus_db_model::InitialDnsGroup;
use nexus_db_model::PasswordHashString;
use nexus_db_model::SiloUser;
use nexus_db_model::SiloUserPasswordHash;
use nexus_db_model::SledUnderlaySubnetAllocation;
use nexus_types::external_api::params as external_params;
use nexus_types::external_api::shared;
use nexus_types::external_api::shared::IdentityType;
Expand Down Expand Up @@ -214,6 +215,63 @@ impl DataStore {
Ok(())
}

// Return the subnet for the rack
pub async fn rack_subnet(
&self,
opctx: &OpContext,
rack_id: Uuid,
) -> Result<IpNetwork, Error> {
opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
let conn = self.pool_connection_authorized(opctx).await?;
use db::schema::rack::dsl;
// It's safe to unwrap the returned `rack_subnet` because
// we filter on `rack_subnet.is_not_null()`
dsl::rack
.filter(dsl::id.eq(rack_id))
.filter(dsl::rack_subnet.is_not_null())
.select(dsl::rack_subnet)
.limit(1)
.first_async::<Option<IpNetwork>>(&*conn)
.await
.map(|net| net.unwrap())
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Return all current underlay allocations for the rack.
///
/// Order allocations by `subnet_octet`
pub async fn rack_subnet_allocations(
&self,
opctx: &OpContext,
rack_id: Uuid,
) -> Result<Vec<SledUnderlaySubnetAllocation>, Error> {
opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
use db::schema::sled_underlay_subnet_allocation::dsl as subnet_dsl;
subnet_dsl::sled_underlay_subnet_allocation
.filter(subnet_dsl::rack_id.eq(rack_id))
.select(SledUnderlaySubnetAllocation::as_select())
.order_by(subnet_dsl::subnet_octet.asc())
.load_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Store a new sled subnet allocation in the database
pub async fn sled_subnet_allocation_insert(
&self,
opctx: &OpContext,
allocation: &SledUnderlaySubnetAllocation,
) -> Result<(), Error> {
opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;
use db::schema::sled_underlay_subnet_allocation::dsl;
diesel::insert_into(dsl::sled_underlay_subnet_allocation)
.values(allocation.clone())
.execute_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
Ok(())
}

// The following methods which return a `TxnError` take a `conn` parameter
// which comes from the transaction created in `rack_set_initialized`.

Expand Down
164 changes: 148 additions & 16 deletions nexus/src/app/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ use crate::external_api::params::CertificateCreate;
use crate::external_api::shared::ServiceUsingCertificate;
use crate::internal_api::params::RackInitializationRequest;
use gateway_client::types::SpType;
use ipnetwork::IpNetwork;
use ipnetwork::{IpNetwork, Ipv6Network};
use nexus_db_model::DnsGroup;
use nexus_db_model::InitialDnsGroup;
use nexus_db_model::SledUnderlaySubnetAllocation;
use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed};
use nexus_db_queries::authz;
use nexus_db_queries::context::OpContext;
Expand All @@ -36,6 +37,7 @@ use nexus_types::external_api::views;
use nexus_types::external_api::views::Baseboard;
use nexus_types::external_api::views::UninitializedSled;
use nexus_types::internal_api::params::DnsRecord;
use omicron_common::address::{get_64_subnet, Ipv6Subnet, RACK_PREFIX};
use omicron_common::api::external::AddressLotKind;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
Expand All @@ -45,7 +47,11 @@ use omicron_common::api::external::LookupResult;
use omicron_common::api::external::Name;
use omicron_common::api::external::NameOrId;
use omicron_common::api::internal::shared::ExternalPortDiscovery;
use omicron_common::bail_unless;
use sled_agent_client::types::AddSledRequest;
use sled_agent_client::types::EarlyNetworkConfigBody;
use sled_agent_client::types::StartSledAgentRequest;
use sled_agent_client::types::StartSledAgentRequestBody;
use sled_agent_client::types::{
BgpConfig, BgpPeerConfig, EarlyNetworkConfig, PortConfigV1,
RackNetworkConfigV1, RouteConfig as SledRouteConfig,
Expand Down Expand Up @@ -584,20 +590,7 @@ impl super::Nexus {
if rack.rack_subnet.is_some() {
return Ok(());
}
let addr = self
.sled_list(opctx, &DataPageParams::max_page())
.await?
.get(0)
.ok_or(Error::InternalError {
internal_message: "no sleds at time of bootstore sync".into(),
})?
.address();

let sa = sled_agent_client::Client::new(
&format!("http://{}", addr),
self.log.clone(),
);

let sa = self.get_any_sled_agent(opctx).await?;
let result = sa
.read_network_bootstore_config_cache()
.await
Expand All @@ -619,7 +612,7 @@ impl super::Nexus {
opctx: &OpContext,
) -> Result<EarlyNetworkConfig, Error> {
let rack = self.rack_lookup(opctx, &self.rack_id).await?;
let subnet = rack.subnet()?;
let subnet = rack_subnet(rack.rack_subnet)?;

let db_ports = self.active_port_settings(opctx).await?;
let mut ports = Vec::new();
Expand Down Expand Up @@ -767,4 +760,143 @@ impl super::Nexus {
uninitialized_sleds.retain(|s| !sled_baseboards.contains(&s.baseboard));
Ok(uninitialized_sleds)
}

/// Add a sled to an intialized rack
pub(crate) async fn add_sled_to_initialized_rack(
&self,
opctx: &OpContext,
sled: UninitializedSled,
) -> Result<(), Error> {
let baseboard_id = sled.baseboard.clone().into();
let hw_baseboard_id =
self.db_datastore.find_hw_baseboard_id(opctx, baseboard_id).await?;

// Fetch all the existing allocations via self.rack_id
let allocations = self
.db_datastore
.rack_subnet_allocations(opctx, sled.rack_id)
.await?;

// Calculate the allocation for the new sled by choosing the minimim
// octet. The returned allocations are ordered by octet, so we will know
// when we have a free one. However, if we already have an allocation
// for the given sled then reuse that one.
// TODO: This could all actually be done in SQL using a `next_item` query.
// See https://github.com/oxidecomputer/omicron/issues/4544
const MIN_SUBNET_OCTET: i16 = 33;
let mut new_allocation = SledUnderlaySubnetAllocation {
rack_id: sled.rack_id,
sled_id: Uuid::new_v4(),
subnet_octet: MIN_SUBNET_OCTET,
hw_baseboard_id,
};
let subnet = self.db_datastore.rack_subnet(opctx, sled.rack_id).await?;
let mut allocation_already_exists = false;
for allocation in allocations {
if allocation.hw_baseboard_id == new_allocation.hw_baseboard_id {
// We already have an allocation for this sled.
new_allocation = allocation;
allocation_already_exists = true;
break;
}
if allocation.subnet_octet == new_allocation.subnet_octet {
bail_unless!(
new_allocation.subnet_octet < 255,
"Too many sled subnets allocated"
);
new_allocation.subnet_octet += 1;
}
}
let rack_subnet =
Ipv6Subnet::<RACK_PREFIX>::from(rack_subnet(Some(subnet))?);

// Write the new allocation row to CRDB. The UNIQUE constraint
// on `subnet_octet` will prevent dueling administrators reusing
// allocations when sleds are being added. We will need another
// mechanism ala generation numbers when we must interleave additions
// and removals of sleds.
if !allocation_already_exists {
self.db_datastore
.sled_subnet_allocation_insert(opctx, &new_allocation)
.await?;
}

// Convert the baseboard as necessary
let baseboard = sled_agent_client::types::Baseboard::Gimlet {
identifier: sled.baseboard.serial.clone(),
model: sled.baseboard.serial.clone(),
revision: sled.baseboard.revision,
};

// Make the call to sled-agent
let req = AddSledRequest {
sled_id: baseboard,
start_request: StartSledAgentRequest {
generation: 0,
schema_version: 1,
body: StartSledAgentRequestBody {
id: new_allocation.sled_id,
rack_id: new_allocation.rack_id,
use_trust_quorum: true,
is_lrtq_learner: true,
subnet: sled_agent_client::types::Ipv6Subnet {
net: get_64_subnet(
rack_subnet,
new_allocation.subnet_octet.try_into().unwrap(),
)
.net()
.into(),
},
},
},
};
let sa = self.get_any_sled_agent(opctx).await?;
sa.add_sled_to_initialized_rack(&req).await.map_err(|e| {
Error::InternalError {
internal_message: format!(
"failed to add sled with baseboard {:?} to rack {}: {e}",
sled.baseboard, new_allocation.rack_id
),
}
})?;

Ok(())
}

async fn get_any_sled_agent(
&self,
opctx: &OpContext,
) -> Result<sled_agent_client::Client, Error> {
let addr = self
.sled_list(opctx, &DataPageParams::max_page())
.await?
.get(0)
.ok_or(Error::InternalError {
internal_message: "no sleds at time of bootstore sync".into(),
})?
.address();

Ok(sled_agent_client::Client::new(
&format!("http://{}", addr),
self.log.clone(),
))
}
}

pub fn rack_subnet(
rack_subnet: Option<IpNetwork>,
) -> Result<Ipv6Network, Error> {
match rack_subnet {
Some(IpNetwork::V6(subnet)) => Ok(subnet),
Some(IpNetwork::V4(_)) => {
return Err(Error::InternalError {
internal_message: "rack subnet not IPv6".into(),
})
}
None => {
return Err(Error::InternalError {
internal_message: "rack subnet not set".into(),
})
}
}
}
Loading

0 comments on commit 81f28dc

Please sign in to comment.