From 81164dc60e5df3f03e94f48b01e4df3b8f7033e5 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 7 Feb 2024 15:10:48 -0500 Subject: [PATCH] [RSS] fix split-brain rack subnet Prior to this commit, RSS accepted a set of parameters that included a `rack_subnet` and an optional (but not really) `rack_network_config`; the `rack_network_config` _also_ contained a `rack_subnet` property. The first `rack_subnet` was used by RSS to pick sled addresses, but the second is what it handed off to Nexus to record in the database. This PR makes three changes to parameters (the bulk of the diff is the expected fallout from them): * Removes the top-level `rack_subnet` field; only the `rack_network_config.rack_subnet` remains. * Makes `rack_network_config` non-optional. The handoff to Nexus would fail if this was `None`, so now it's always required. (This was only a little annoying in a few tests where we now have to cons up a fake network config.) * Changes wicket/wicket to accept a subset of `rack_network_config` that does _not_ include a `rack_subnet`; this is a value the control plane should choose on its own. One potentially-dangerous change is that the RSS parameters changed are not just passed when RSS is run; they're also serialized to disk as `rss-sled-plan.json`. We have a test to ensure changes don't affect the schema of this plan, but I believe the changes here are backwards compatible (an old plan that has a no-longer-present `rack_subet` is fine, and the JSON representation of the optional `RackNetworkConfig` is that it can be either `null` or an object; we'll fail to read any plans with `null`, but those would have failed to handoff to Nexus anyway as noted above). To check this is right, I pulled the `rss-sled-plan.json` off of madrid, censored the certs, replaced the password hash with one of our test hashes, and added a test that we can still read it. --- Changes that might make sense but I didn't attempt: * Changing the `rack_network_config` in the early networking bootstore to be non-optional. I think this would be correct, but is probably more trouble than it's worth to migrate. We might consider this the next time we make other, unrelated changes here though. * Removing the `rack_subnet` field not just from user -> wicket, but also from {wicket,developer} -> RSS. We could make RSS pick its own rack subnet, maybe? This seemed dubious enough I stopped. This does mean the TOML files used to automatically launch RSS still have a `rack_subnet` value, but now it's only one (under the rack network config) instead of two. * Changing the rack subnet choice to be random. wicket continues to use the hardcoded value we've been using. --- I also fixed a handful of places where we define the rack subnet `fd00:1122:3344:01::/56`; I believe this is just wrong / a typo. The `:01` at the end is equivalent to `:0001`, which is equivalent to the /56 `fd00:1122:3344:0000::/56`. Every place we had this we meant to use `fd00:1122:3344:0100::/56`, so I changed all them (I think!). Fixes #5009, but only for any racks that run RSS after this change. I am not attempting to retroactively correct any racks that had the wrong `rack_subnet` recorded in the database, as I believe all such deployed racks are dev systems that are frequently wiped and reinstalled. --- clients/wicketd-client/src/lib.rs | 1 + docs/how-to-run.adoc | 2 +- nexus/src/app/rack.rs | 501 +++++++++--------- nexus/src/lib.rs | 8 +- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/rack.rs | 2 +- nexus/types/src/internal_api/params.rs | 2 +- openapi/bootstrap-agent.json | 7 +- openapi/nexus-internal.json | 2 +- openapi/wicketd.json | 76 ++- schema/rss-sled-plan.json | 11 +- sled-agent/src/bootstrap/params.rs | 25 +- sled-agent/src/rack_setup/config.rs | 16 +- sled-agent/src/rack_setup/plan/service.rs | 14 +- sled-agent/src/rack_setup/plan/sled.rs | 28 +- sled-agent/src/rack_setup/service.rs | 105 ++-- sled-agent/src/sim/server.rs | 10 +- .../madrid-rss-sled-plan.json | 1 + .../madrid-rss-sled-plan.json | 164 ++++++ .../gimlet-standalone/config-rss.toml | 17 +- smf/sled-agent/non-gimlet/config-rss.toml | 17 +- wicket-common/src/rack_setup.rs | 16 +- .../src/cli/rack_setup/config_template.toml | 1 - wicket/src/cli/rack_setup/config_toml.rs | 13 +- wicketd/src/http_entrypoints.rs | 12 +- wicketd/src/preflight_check.rs | 6 +- wicketd/src/preflight_check/uplink.rs | 4 +- wicketd/src/rss_config.rs | 15 +- 28 files changed, 629 insertions(+), 449 deletions(-) create mode 100644 sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json create mode 100644 sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json diff --git a/clients/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs index 01c3b04f87..09f9ca1418 100644 --- a/clients/wicketd-client/src/lib.rs +++ b/clients/wicketd-client/src/lib.rs @@ -51,6 +51,7 @@ progenitor::generate_api!( CurrentRssUserConfigInsensitive = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] }, CurrentRssUserConfigSensitive = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] }, CurrentRssUserConfig = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] }, + UserSpecifiedRackNetworkConfig = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] }, GetLocationResponse = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] }, }, replace = { diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 6a0b8f79d5..e286fe3730 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -277,7 +277,7 @@ The below example demonstrates a single static gateway route; in-depth explanati [rack_network_config] # An internal-only IPv6 address block which contains AZ-wide services. # This does not need to be changed. -rack_subnet = "fd00:1122:3344:01::/56" +rack_subnet = "fd00:1122:3344:0100::/56" # A range of IP addresses used by Boundary Services on the network. In a real # system, these would be addresses of the uplink ports on the Sidecar. With # softnpu, only one address is used. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 2b38c62b23..a4d559f823 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -212,11 +212,7 @@ impl super::Nexus { mapped_fleet_roles, }; - let rack_network_config = request.rack_network_config.as_ref().ok_or( - Error::invalid_request( - "cannot initialize a rack without a network config", - ), - )?; + let rack_network_config = &request.rack_network_config; self.db_datastore .rack_set_initialized( @@ -336,289 +332,278 @@ impl super::Nexus { // Currently calling some of the apis directly, but should we be using sagas // going forward via self.run_saga()? Note that self.create_runnable_saga and // self.execute_saga are currently not available within this scope. - info!(self.log, "Checking for Rack Network Configuration"); - if let Some(rack_network_config) = &request.rack_network_config { - info!(self.log, "Recording Rack Network Configuration"); - let address_lot_name = - Name::from_str("initial-infra").map_err(|e| { - Error::internal_error(&format!( - "unable to use `initial-infra` as `Name`: {e}" - )) - })?; - let identity = IdentityMetadataCreateParams { - name: address_lot_name.clone(), - description: "initial infrastructure ip address lot" - .to_string(), - }; + info!(self.log, "Recording Rack Network Configuration"); + let address_lot_name = + Name::from_str("initial-infra").map_err(|e| { + Error::internal_error(&format!( + "unable to use `initial-infra` as `Name`: {e}" + )) + })?; + let identity = IdentityMetadataCreateParams { + name: address_lot_name.clone(), + description: "initial infrastructure ip address lot".to_string(), + }; - let kind = AddressLotKind::Infra; + let kind = AddressLotKind::Infra; - let first_address = IpAddr::V4(rack_network_config.infra_ip_first); - let last_address = IpAddr::V4(rack_network_config.infra_ip_last); - let ipv4_block = - AddressLotBlockCreate { first_address, last_address }; + let first_address = IpAddr::V4(rack_network_config.infra_ip_first); + let last_address = IpAddr::V4(rack_network_config.infra_ip_last); + let ipv4_block = AddressLotBlockCreate { first_address, last_address }; - let blocks = vec![ipv4_block]; + let blocks = vec![ipv4_block]; - let address_lot_params = - AddressLotCreate { identity, kind, blocks }; + let address_lot_params = AddressLotCreate { identity, kind, blocks }; - match self - .db_datastore - .address_lot_create(opctx, &address_lot_params) - .await - { - Ok(_) => Ok(()), - Err(e) => match e { - Error::ObjectAlreadyExists { - type_name: _, - object_name: _, - } => Ok(()), - _ => Err(e), - }, - }?; + match self + .db_datastore + .address_lot_create(opctx, &address_lot_params) + .await + { + Ok(_) => Ok(()), + Err(e) => match e { + Error::ObjectAlreadyExists { type_name: _, object_name: _ } => { + Ok(()) + } + _ => Err(e), + }, + }?; - let mut bgp_configs = HashMap::new(); + let mut bgp_configs = HashMap::new(); - for bgp_config in &rack_network_config.bgp { - bgp_configs.insert(bgp_config.asn, bgp_config.clone()); + for bgp_config in &rack_network_config.bgp { + bgp_configs.insert(bgp_config.asn, bgp_config.clone()); - let bgp_config_name: Name = - format!("as{}", bgp_config.asn).parse().unwrap(); + let bgp_config_name: Name = + format!("as{}", bgp_config.asn).parse().unwrap(); - let announce_set_name: Name = - format!("as{}-announce", bgp_config.asn).parse().unwrap(); + let announce_set_name: Name = + format!("as{}-announce", bgp_config.asn).parse().unwrap(); - let address_lot_name: Name = - format!("as{}-lot", bgp_config.asn).parse().unwrap(); + let address_lot_name: Name = + format!("as{}-lot", bgp_config.asn).parse().unwrap(); - self.db_datastore - .address_lot_create( - &opctx, - &AddressLotCreate { - identity: IdentityMetadataCreateParams { - name: address_lot_name, - description: format!( - "Address lot for announce set in as {}", - bgp_config.asn - ), - }, - kind: AddressLotKind::Infra, - blocks: bgp_config - .originate - .iter() - .map(|o| AddressLotBlockCreate { - first_address: o.network().into(), - last_address: o.broadcast().into(), - }) - .collect(), + self.db_datastore + .address_lot_create( + &opctx, + &AddressLotCreate { + identity: IdentityMetadataCreateParams { + name: address_lot_name, + description: format!( + "Address lot for announce set in as {}", + bgp_config.asn + ), }, - ) - .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to create address lot for BGP as {}: {}", - bgp_config.asn, e - )) - })?; - - self.db_datastore - .bgp_create_announce_set( - &opctx, - &BgpAnnounceSetCreate { - identity: IdentityMetadataCreateParams { - name: announce_set_name.clone(), - description: format!( - "Announce set for AS {}", - bgp_config.asn - ), - }, - announcement: bgp_config - .originate - .iter() - .map(|x| BgpAnnouncementCreate { - address_lot_block: NameOrId::Name( - format!("as{}", bgp_config.asn) - .parse() - .unwrap(), - ), - network: IpNetwork::from(*x).into(), - }) - .collect(), + kind: AddressLotKind::Infra, + blocks: bgp_config + .originate + .iter() + .map(|o| AddressLotBlockCreate { + first_address: o.network().into(), + last_address: o.broadcast().into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create address lot for BGP as {}: {}", + bgp_config.asn, e + )) + })?; + + self.db_datastore + .bgp_create_announce_set( + &opctx, + &BgpAnnounceSetCreate { + identity: IdentityMetadataCreateParams { + name: announce_set_name.clone(), + description: format!( + "Announce set for AS {}", + bgp_config.asn + ), }, - ) - .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to create bgp announce set for as {}: {}", - bgp_config.asn, e - )) - })?; - - self.db_datastore - .bgp_config_set( - &opctx, - &BgpConfigCreate { - identity: IdentityMetadataCreateParams { - name: bgp_config_name, - description: format!( - "BGP config for AS {}", - bgp_config.asn + announcement: bgp_config + .originate + .iter() + .map(|x| BgpAnnouncementCreate { + address_lot_block: NameOrId::Name( + format!("as{}", bgp_config.asn) + .parse() + .unwrap(), ), - }, - asn: bgp_config.asn, - bgp_announce_set_id: announce_set_name.into(), - vrf: None, - }, - ) - .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to set bgp config for as {}: {}", - bgp_config.asn, e - )) - })?; - } + network: IpNetwork::from(*x).into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create bgp announce set for as {}: {}", + bgp_config.asn, e + )) + })?; - for (idx, uplink_config) in - rack_network_config.ports.iter().enumerate() - { - let switch = uplink_config.switch.to_string(); - let switch_location = Name::from_str(&switch).map_err(|e| { + self.db_datastore + .bgp_config_set( + &opctx, + &BgpConfigCreate { + identity: IdentityMetadataCreateParams { + name: bgp_config_name, + description: format!( + "BGP config for AS {}", + bgp_config.asn + ), + }, + asn: bgp_config.asn, + bgp_announce_set_id: announce_set_name.into(), + vrf: None, + }, + ) + .await + .map_err(|e| { Error::internal_error(&format!( - "unable to use {switch} as Name: {e}" + "unable to set bgp config for as {}: {}", + bgp_config.asn, e )) })?; + } - let uplink_name = format!("default-uplink{idx}"); - let name = Name::from_str(&uplink_name).unwrap(); + for (idx, uplink_config) in rack_network_config.ports.iter().enumerate() + { + let switch = uplink_config.switch.to_string(); + let switch_location = Name::from_str(&switch).map_err(|e| { + Error::internal_error(&format!( + "unable to use {switch} as Name: {e}" + )) + })?; - let identity = IdentityMetadataCreateParams { - name: name.clone(), - description: "initial uplink configuration".to_string(), - }; + let uplink_name = format!("default-uplink{idx}"); + let name = Name::from_str(&uplink_name).unwrap(); - let port_config = SwitchPortConfigCreate { - geometry: nexus_types::external_api::params::SwitchPortGeometry::Qsfp28x1, - }; + let identity = IdentityMetadataCreateParams { + name: name.clone(), + description: "initial uplink configuration".to_string(), + }; - let mut port_settings_params = SwitchPortSettingsCreate { - identity, - port_config, - groups: vec![], - links: HashMap::new(), - interfaces: HashMap::new(), - routes: HashMap::new(), - bgp_peers: HashMap::new(), - addresses: HashMap::new(), + let port_config = SwitchPortConfigCreate { + geometry: nexus_types::external_api::params::SwitchPortGeometry::Qsfp28x1, }; - let addresses: Vec
= uplink_config - .addresses - .iter() - .map(|a| Address { - address_lot: NameOrId::Name(address_lot_name.clone()), - address: (*a).into(), - }) - .collect(); - - port_settings_params - .addresses - .insert("phy0".to_string(), AddressConfig { addresses }); - - let routes: Vec = uplink_config - .routes - .iter() - .map(|r| Route { - dst: r.destination.into(), - gw: r.nexthop, - vid: None, - }) - .collect(); - - port_settings_params - .routes - .insert("phy0".to_string(), RouteConfig { routes }); - - let peers: Vec = uplink_config - .bgp_peers - .iter() - .map(|r| BgpPeer { - bgp_announce_set: NameOrId::Name( - format!("as{}-announce", r.asn).parse().unwrap(), - ), - bgp_config: NameOrId::Name( - format!("as{}", r.asn).parse().unwrap(), - ), - interface_name: "phy0".into(), - addr: r.addr.into(), - hold_time: r.hold_time.unwrap_or(6) as u32, - idle_hold_time: r.idle_hold_time.unwrap_or(3) as u32, - delay_open: r.delay_open.unwrap_or(0) as u32, - connect_retry: r.connect_retry.unwrap_or(3) as u32, - keepalive: r.keepalive.unwrap_or(2) as u32, - }) - .collect(); + let mut port_settings_params = SwitchPortSettingsCreate { + identity, + port_config, + groups: vec![], + links: HashMap::new(), + interfaces: HashMap::new(), + routes: HashMap::new(), + bgp_peers: HashMap::new(), + addresses: HashMap::new(), + }; - port_settings_params - .bgp_peers - .insert("phy0".to_string(), BgpPeerConfig { peers }); + let addresses: Vec
= uplink_config + .addresses + .iter() + .map(|a| Address { + address_lot: NameOrId::Name(address_lot_name.clone()), + address: (*a).into(), + }) + .collect(); + + port_settings_params + .addresses + .insert("phy0".to_string(), AddressConfig { addresses }); + + let routes: Vec = uplink_config + .routes + .iter() + .map(|r| Route { + dst: r.destination.into(), + gw: r.nexthop, + vid: None, + }) + .collect(); + + port_settings_params + .routes + .insert("phy0".to_string(), RouteConfig { routes }); + + let peers: Vec = uplink_config + .bgp_peers + .iter() + .map(|r| BgpPeer { + bgp_announce_set: NameOrId::Name( + format!("as{}-announce", r.asn).parse().unwrap(), + ), + bgp_config: NameOrId::Name( + format!("as{}", r.asn).parse().unwrap(), + ), + interface_name: "phy0".into(), + addr: r.addr.into(), + hold_time: r.hold_time.unwrap_or(6) as u32, + idle_hold_time: r.idle_hold_time.unwrap_or(3) as u32, + delay_open: r.delay_open.unwrap_or(0) as u32, + connect_retry: r.connect_retry.unwrap_or(3) as u32, + keepalive: r.keepalive.unwrap_or(2) as u32, + }) + .collect(); + + port_settings_params + .bgp_peers + .insert("phy0".to_string(), BgpPeerConfig { peers }); + + let link = LinkConfigCreate { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 + lldp: LldpServiceConfigCreate { + enabled: false, + lldp_config: None, + }, + fec: uplink_config.uplink_port_fec.into(), + speed: uplink_config.uplink_port_speed.into(), + autoneg: uplink_config.autoneg, + }; - let link = LinkConfigCreate { - mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 - lldp: LldpServiceConfigCreate { - enabled: false, - lldp_config: None, - }, - fec: uplink_config.uplink_port_fec.into(), - speed: uplink_config.uplink_port_speed.into(), - autoneg: uplink_config.autoneg, - }; + port_settings_params.links.insert("phy".to_string(), link); - port_settings_params.links.insert("phy".to_string(), link); + match self + .db_datastore + .switch_port_settings_create(opctx, &port_settings_params, None) + .await + { + Ok(_) | Err(Error::ObjectAlreadyExists { .. }) => Ok(()), + Err(e) => Err(e), + }?; - match self - .db_datastore - .switch_port_settings_create( - opctx, - &port_settings_params, - None, - ) - .await - { - Ok(_) | Err(Error::ObjectAlreadyExists { .. }) => Ok(()), - Err(e) => Err(e), - }?; - - let port_settings_id = self - .db_datastore - .switch_port_settings_get_id( - opctx, - nexus_db_model::Name(name.clone()), - ) - .await?; + let port_settings_id = self + .db_datastore + .switch_port_settings_get_id( + opctx, + nexus_db_model::Name(name.clone()), + ) + .await?; - let switch_port_id = self - .db_datastore - .switch_port_get_id( - opctx, - rack_id, - switch_location.into(), - Name::from_str(&uplink_config.port).unwrap().into(), - ) - .await?; + let switch_port_id = self + .db_datastore + .switch_port_get_id( + opctx, + rack_id, + switch_location.into(), + Name::from_str(&uplink_config.port).unwrap().into(), + ) + .await?; + + self.db_datastore + .switch_port_set_settings_id( + opctx, + switch_port_id, + Some(port_settings_id), + db::datastore::UpdatePrecondition::Null, + ) + .await?; + } // TODO - https://github.com/oxidecomputer/omicron/issues/3277 + // record port speed - self.db_datastore - .switch_port_set_settings_id( - opctx, - switch_port_id, - Some(port_settings_id), - db::datastore::UpdatePrecondition::Null, - ) - .await?; - } // TODO - https://github.com/oxidecomputer/omicron/issues/3277 - // record port speed - }; self.initial_bootstore_sync(&opctx).await?; Ok(()) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e1392440a1..cb08bfcdc0 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -288,13 +288,15 @@ impl nexus_test_interface::NexusServer for Server { vec!["qsfp0".parse().unwrap()], )]), ), - rack_network_config: Some(RackNetworkConfig { - rack_subnet: "fd00:1122:3344:01::/56".parse().unwrap(), + rack_network_config: RackNetworkConfig { + rack_subnet: "fd00:1122:3344:0100::/56" + .parse() + .unwrap(), infra_ip_first: Ipv4Addr::UNSPECIFIED, infra_ip_last: Ipv4Addr::UNSPECIFIED, ports: Vec::new(), bgp: Vec::new(), - }), + }, }, ) .await diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index da21602cb1..7baacf97ce 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -65,7 +65,7 @@ pub const RACK_UUID: &str = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc"; pub const SWITCH_UUID: &str = "dae4e1f1-410e-4314-bff1-fec0504be07e"; pub const OXIMETER_UUID: &str = "39e6175b-4df2-4730-b11d-cbc1e60a2e78"; pub const PRODUCER_UUID: &str = "a6458b7d-87c3-4483-be96-854d814c20de"; -pub const RACK_SUBNET: &str = "fd00:1122:3344:01::/56"; +pub const RACK_SUBNET: &str = "fd00:1122:3344:0100::/56"; /// Password for the user created by the test suite /// diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index a6fc93e92a..a58871ee71 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -110,7 +110,7 @@ async fn test_sled_list_uninitialized(cptestctx: &ControlPlaneTestContext) { let baseboard = uninitialized_sleds.pop().unwrap().baseboard; let sled_uuid = Uuid::new_v4(); let sa = SledAgentStartupInfo { - sa_address: "[fd00:1122:3344:01::1]:8080".parse().unwrap(), + sa_address: "[fd00:1122:3344:0100::1]:8080".parse().unwrap(), role: SledRole::Gimlet, baseboard: Baseboard { serial_number: baseboard.serial, diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index bc25e8d4bd..ab15ec26b7 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -263,7 +263,7 @@ pub struct RackInitializationRequest { /// The external qsfp ports per sidecar pub external_port_count: ExternalPortDiscovery, /// Initial rack network configuration - pub rack_network_config: Option, + pub rack_network_config: RackNetworkConfig, } pub type DnsConfigParams = dns_service_client::types::DnsConfigParams; diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 6fd83cef47..a55803eda9 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -651,7 +651,6 @@ } }, "rack_network_config": { - "nullable": true, "description": "Initial rack network configuration", "allOf": [ { @@ -659,10 +658,6 @@ } ] }, - "rack_subnet": { - "type": "string", - "format": "ipv6" - }, "recovery_silo": { "description": "Configuration of the Recovery Silo (the initial Silo)", "allOf": [ @@ -688,7 +683,7 @@ "external_dns_zone_name", "internal_services_ip_pool_ranges", "ntp_servers", - "rack_subnet", + "rack_network_config", "recovery_silo" ] }, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index bc26736b37..4714b64c52 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -5618,7 +5618,6 @@ } }, "rack_network_config": { - "nullable": true, "description": "Initial rack network configuration", "allOf": [ { @@ -5649,6 +5648,7 @@ "external_port_count", "internal_dns_zone_config", "internal_services_ip_pool_ranges", + "rack_network_config", "recovery_silo", "services" ] diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 300e8412c3..b9645a174f 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1132,7 +1132,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackNetworkConfigV1" + "$ref": "#/components/schemas/UserSpecifiedRackNetworkConfig" } ] } @@ -2172,7 +2172,7 @@ } }, "rack_network_config": { - "$ref": "#/components/schemas/RackNetworkConfigV1" + "$ref": "#/components/schemas/UserSpecifiedRackNetworkConfig" } }, "required": [ @@ -2190,46 +2190,6 @@ "type": "string", "format": "uuid" }, - "RackNetworkConfigV1": { - "description": "Initial network configuration", - "type": "object", - "properties": { - "bgp": { - "description": "BGP configurations for connecting the rack to external networks", - "type": "array", - "items": { - "$ref": "#/components/schemas/BgpConfig" - } - }, - "infra_ip_first": { - "description": "First ip address to be used for configuring network infrastructure", - "type": "string", - "format": "ipv4" - }, - "infra_ip_last": { - "description": "Last ip address to be used for configuring network infrastructure", - "type": "string", - "format": "ipv4" - }, - "ports": { - "description": "Uplinks for connecting the rack to external networks", - "type": "array", - "items": { - "$ref": "#/components/schemas/PortConfigV1" - } - }, - "rack_subnet": { - "$ref": "#/components/schemas/Ipv6Network" - } - }, - "required": [ - "bgp", - "infra_ip_first", - "infra_ip_last", - "ports", - "rack_subnet" - ] - }, "RackOperationStatus": { "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\n
JSON schema\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackInitId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackResetId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ```
", "oneOf": [ @@ -4698,6 +4658,38 @@ } ] }, + "UserSpecifiedRackNetworkConfig": { + "description": "User-specified parts of [`RackNetworkConfig`](omicron_common::api::internal::shared::RackNetworkConfig).", + "type": "object", + "properties": { + "bgp": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpConfig" + } + }, + "infra_ip_first": { + "type": "string", + "format": "ipv4" + }, + "infra_ip_last": { + "type": "string", + "format": "ipv4" + }, + "ports": { + "type": "array", + "items": { + "$ref": "#/components/schemas/PortConfigV1" + } + } + }, + "required": [ + "bgp", + "infra_ip_first", + "infra_ip_last", + "ports" + ] + }, "IgnitionCommand": { "description": "Ignition command.\n\n
JSON schema\n\n```json { \"description\": \"Ignition command.\", \"type\": \"string\", \"enum\": [ \"power_on\", \"power_off\", \"power_reset\" ] } ```
", "type": "string", diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index cbd73ed066..f5ac5bd0ff 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -466,7 +466,7 @@ "external_dns_zone_name", "internal_services_ip_pool_ranges", "ntp_servers", - "rack_subnet", + "rack_network_config", "recovery_silo" ], "properties": { @@ -521,19 +521,12 @@ }, "rack_network_config": { "description": "Initial rack network configuration", - "anyOf": [ + "allOf": [ { "$ref": "#/definitions/RackNetworkConfigV1" - }, - { - "type": "null" } ] }, - "rack_subnet": { - "type": "string", - "format": "ipv6" - }, "recovery_silo": { "description": "Configuration of the Recovery Silo (the initial Silo)", "allOf": [ diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b684d96763..48444af8d4 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize}; use sha3::{Digest, Sha3_256}; use sled_hardware::Baseboard; use std::borrow::Cow; -use std::collections::HashSet; +use std::collections::BTreeSet; use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -24,14 +24,13 @@ pub enum BootstrapAddressDiscovery { /// Ignore all bootstrap addresses except our own. OnlyOurs, /// Ignore all bootstrap addresses except the following. - OnlyThese { addrs: HashSet }, + OnlyThese { addrs: BTreeSet }, } // "Shadow" copy of `RackInitializeRequest` that does no validation on its // fields. #[derive(Clone, Deserialize)] struct UnvalidatedRackInitializeRequest { - rack_subnet: Ipv6Addr, trust_quorum_peers: Option>, bootstrap_discovery: BootstrapAddressDiscovery, ntp_servers: Vec, @@ -41,7 +40,7 @@ struct UnvalidatedRackInitializeRequest { external_dns_zone_name: String, external_certificates: Vec, recovery_silo: RecoverySiloConfig, - rack_network_config: Option, + rack_network_config: RackNetworkConfig, } /// Configuration for the "rack setup service". @@ -53,8 +52,6 @@ struct UnvalidatedRackInitializeRequest { #[derive(Clone, Deserialize, Serialize, PartialEq, JsonSchema)] #[serde(try_from = "UnvalidatedRackInitializeRequest")] pub struct RackInitializeRequest { - pub rack_subnet: Ipv6Addr, - /// The set of peer_ids required to initialize trust quorum /// /// The value is `None` if we are not using trust quorum @@ -89,7 +86,7 @@ pub struct RackInitializeRequest { pub recovery_silo: RecoverySiloConfig, /// Initial rack network configuration - pub rack_network_config: Option, + pub rack_network_config: RackNetworkConfig, } // This custom debug implementation hides the private keys. @@ -98,7 +95,6 @@ impl std::fmt::Debug for RackInitializeRequest { // If you find a compiler error here, and you just added a field to this // struct, be sure to add it to the Debug impl below! let RackInitializeRequest { - rack_subnet, trust_quorum_peers: trust_qurorum_peers, bootstrap_discovery, ntp_servers, @@ -112,7 +108,6 @@ impl std::fmt::Debug for RackInitializeRequest { } = &self; f.debug_struct("RackInitializeRequest") - .field("rack_subnet", rack_subnet) .field("trust_quorum_peers", trust_qurorum_peers) .field("bootstrap_discovery", bootstrap_discovery) .field("ntp_servers", ntp_servers) @@ -155,7 +150,6 @@ impl TryFrom for RackInitializeRequest { } Ok(RackInitializeRequest { - rack_subnet: value.rack_subnet, trust_quorum_peers: value.trust_quorum_peers, bootstrap_discovery: value.bootstrap_discovery, ntp_servers: value.ntp_servers, @@ -368,6 +362,7 @@ pub fn test_config() -> RackInitializeRequest { #[cfg(test)] mod tests { + use std::net::Ipv4Addr; use std::net::Ipv6Addr; use super::*; @@ -395,7 +390,6 @@ mod tests { #[test] fn parse_rack_initialization_weak_hash() { let config = r#" - rack_subnet = "fd00:1122:3344:0100::" bootstrap_discovery.type = "only_ours" ntp_servers = [ "ntp.eng.oxide.computer" ] dns_servers = [ "1.1.1.1", "9.9.9.9" ] @@ -480,7 +474,6 @@ mod tests { // Conjure up a config; we'll tweak the internal services pools and // external DNS IPs, but no other fields matter. let mut config = UnvalidatedRackInitializeRequest { - rack_subnet: Ipv6Addr::LOCALHOST, trust_quorum_peers: None, bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, ntp_servers: Vec::new(), @@ -494,7 +487,13 @@ mod tests { user_name: "recovery".parse().unwrap(), user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), }, - rack_network_config: None, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Addr::LOCALHOST.into(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + }, }; // Valid configs: all external DNS IPs are contained in the IP pool diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 33de7121d4..52bea295a5 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -70,12 +70,14 @@ impl SetupServiceConfig { } pub fn az_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new(self.rack_subnet) + Ipv6Subnet::::new(self.rack_network_config.rack_subnet.ip()) } /// Returns the subnet for our rack. pub fn rack_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new(self.rack_subnet) + Ipv6Subnet::::new( + self.rack_network_config.rack_subnet.ip(), + ) } /// Returns the subnet for the `index`-th sled in the rack. @@ -92,12 +94,12 @@ mod test { use anyhow::Context; use camino::Utf8PathBuf; use omicron_common::address::IpRange; + use omicron_common::api::internal::shared::RackNetworkConfig; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; #[test] fn test_subnets() { let cfg = SetupServiceConfig { - rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), trust_quorum_peers: None, bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, ntp_servers: vec![String::from("test.pool.example.com")], @@ -119,7 +121,13 @@ mod test { .parse() .unwrap(), }, - rack_network_config: None, + rack_network_config: RackNetworkConfig { + rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + }, }; assert_eq!( diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index bed82a7a01..220f0d686b 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -35,7 +35,7 @@ use sled_agent_client::{ use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::num::Wrapping; use thiserror::Error; @@ -708,7 +708,7 @@ impl Plan { log: &Logger, config: &Config, storage_manager: &StorageHandle, - sleds: &HashMap, + sleds: &BTreeMap, ) -> Result { // Load the information we need about each Sled to be able to allocate // components on it. @@ -1078,6 +1078,7 @@ mod tests { use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::RecoverySiloConfig; use omicron_common::address::IpRange; + use omicron_common::api::internal::shared::RackNetworkConfig; const EXPECTED_RESERVED_ADDRESSES: u16 = 2; const EXPECTED_USABLE_ADDRESSES: u16 = @@ -1149,7 +1150,6 @@ mod tests { "fd01::103", ]; let config = Config { - rack_subnet: Ipv6Addr::LOCALHOST, trust_quorum_peers: None, bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, ntp_servers: Vec::new(), @@ -1173,7 +1173,13 @@ mod tests { user_name: "recovery".parse().unwrap(), user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), }, - rack_network_config: None, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Addr::LOCALHOST.into(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + }, }; let mut svp = ServicePortBuilder::new(&config); diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 07f33893fc..efdd86d2f9 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -16,7 +16,7 @@ use serde::{Deserialize, Serialize}; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet}; use std::net::{Ipv6Addr, SocketAddrV6}; use thiserror::Error; use uuid::Uuid; @@ -46,7 +46,7 @@ const RSS_SLED_PLAN_FILENAME: &str = "rss-sled-plan.json"; #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct Plan { pub rack_id: Uuid, - pub sleds: HashMap, + pub sleds: BTreeMap, // Store the provided RSS configuration as part of the sled plan; if it // changes after reboot, we need to know. @@ -81,7 +81,7 @@ impl Plan { log: &Logger, config: &Config, storage_manager: &StorageHandle, - bootstrap_addrs: HashSet, + bootstrap_addrs: BTreeSet, use_trust_quorum: bool, ) -> Result { let rack_id = Uuid::new_v4(); @@ -117,7 +117,7 @@ impl Plan { info!(log, "Serializing plan"); - let mut sleds = std::collections::HashMap::new(); + let mut sleds = BTreeMap::new(); for (addr, allocation) in allocations { sleds.insert(addr, allocation); } @@ -152,4 +152,24 @@ mod tests { &serde_json::to_string_pretty(&schema).unwrap(), ); } + + #[test] + fn test_read_known_rss_sled_plans() { + let known_rss_sled_plans = &["madrid-rss-sled-plan.json"]; + + let path = Utf8PathBuf::from("tests/old-rss-sled-plans"); + let out_path = Utf8PathBuf::from("tests/output/new-rss-sled-plans"); + for sled_plan_basename in known_rss_sled_plans { + println!("checking {:?}", sled_plan_basename); + let contents = + std::fs::read_to_string(path.join(sled_plan_basename)) + .expect("failed to read file"); + let parsed: Plan = + serde_json::from_str(&contents).expect("failed to parse file"); + expectorate::assert_contents( + out_path.join(sled_plan_basename), + &serde_json::to_string_pretty(&parsed).unwrap(), + ); + } + } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index af81df52bb..2788e189cc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -601,58 +601,55 @@ impl ServiceInner { .map(Into::into) .collect(); - let rack_network_config = match &config.rack_network_config { - Some(config) => { - let value = NexusTypes::RackNetworkConfigV1 { - rack_subnet: config.rack_subnet, - infra_ip_first: config.infra_ip_first, - infra_ip_last: config.infra_ip_last, - ports: config - .ports - .iter() - .map(|config| NexusTypes::PortConfigV1 { - port: config.port.clone(), - routes: config - .routes - .iter() - .map(|r| NexusTypes::RouteConfig { - destination: r.destination, - nexthop: r.nexthop, - }) - .collect(), - addresses: config.addresses.clone(), - switch: config.switch.into(), - uplink_port_speed: config.uplink_port_speed.into(), - uplink_port_fec: config.uplink_port_fec.into(), - autoneg: config.autoneg, - bgp_peers: config - .bgp_peers - .iter() - .map(|b| NexusTypes::BgpPeerConfig { - addr: b.addr, - asn: b.asn, - port: b.port.clone(), - hold_time: b.hold_time, - connect_retry: b.connect_retry, - delay_open: b.delay_open, - idle_hold_time: b.idle_hold_time, - keepalive: b.keepalive, - }) - .collect(), - }) - .collect(), - bgp: config - .bgp - .iter() - .map(|config| NexusTypes::BgpConfig { - asn: config.asn, - originate: config.originate.clone(), - }) - .collect(), - }; - Some(value) + let rack_network_config = { + let config = &config.rack_network_config; + NexusTypes::RackNetworkConfigV1 { + rack_subnet: config.rack_subnet, + infra_ip_first: config.infra_ip_first, + infra_ip_last: config.infra_ip_last, + ports: config + .ports + .iter() + .map(|config| NexusTypes::PortConfigV1 { + port: config.port.clone(), + routes: config + .routes + .iter() + .map(|r| NexusTypes::RouteConfig { + destination: r.destination, + nexthop: r.nexthop, + }) + .collect(), + addresses: config.addresses.clone(), + switch: config.switch.into(), + uplink_port_speed: config.uplink_port_speed.into(), + uplink_port_fec: config.uplink_port_fec.into(), + autoneg: config.autoneg, + bgp_peers: config + .bgp_peers + .iter() + .map(|b| NexusTypes::BgpPeerConfig { + addr: b.addr, + asn: b.asn, + port: b.port.clone(), + hold_time: b.hold_time, + connect_retry: b.connect_retry, + delay_open: b.delay_open, + idle_hold_time: b.idle_hold_time, + keepalive: b.keepalive, + }) + .collect(), + }) + .collect(), + bgp: config + .bgp + .iter() + .map(|config| NexusTypes::BgpConfig { + asn: config.asn, + originate: config.originate.clone(), + }) + .collect(), } - None => None, }; info!(self.log, "rack_network_config: {:#?}", rack_network_config); @@ -868,14 +865,14 @@ impl ServiceInner { // - Enough peers to create a new plan (if one does not exist) let bootstrap_addrs = match &config.bootstrap_discovery { BootstrapAddressDiscovery::OnlyOurs => { - HashSet::from([local_bootstrap_agent.our_address()]) + BTreeSet::from([local_bootstrap_agent.our_address()]) } BootstrapAddressDiscovery::OnlyThese { addrs } => addrs.clone(), }; let maybe_sled_plan = SledPlan::load(&self.log, storage_manager).await?; if let Some(plan) = &maybe_sled_plan { - let stored_peers: HashSet = + let stored_peers: BTreeSet = plan.sleds.keys().map(|a| *a.ip()).collect(); if stored_peers != bootstrap_addrs { let e = concat!( @@ -931,7 +928,7 @@ impl ServiceInner { schema_version: 1, body: EarlyNetworkConfigBody { ntp_servers: config.ntp_servers.clone(), - rack_network_config: config.rack_network_config.clone(), + rack_network_config: Some(config.rack_network_config.clone()), }, }; info!(self.log, "Writing Rack Network Configuration to bootstore"); diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index b214667631..fd5995b8f1 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -26,6 +26,8 @@ use omicron_common::FileKv; use slog::{info, Drain, Logger}; use std::collections::HashMap; use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; use std::net::SocketAddr; use std::net::SocketAddrV6; use std::sync::Arc; @@ -455,7 +457,13 @@ pub async fn run_standalone_server( external_port_count: NexusTypes::ExternalPortDiscovery::Static( HashMap::new(), ), - rack_network_config: None, + rack_network_config: NexusTypes::RackNetworkConfigV1 { + rack_subnet: Ipv6Addr::LOCALHOST.into(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + }, }; handoff_to_nexus(&log, &config, &rack_init_request).await?; diff --git a/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json new file mode 100644 index 0000000000..5512247ee8 --- /dev/null +++ b/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json @@ -0,0 +1 @@ +{"rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","sleds":{"[fdb0:a840:2504:396::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b3e78a88-0f2e-476e-a8a9-2d8c90a169d6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:103::/64"}}},"[fdb0:a840:2504:157::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"168e1ad6-1e4b-4f7a-b894-157974bd8bb8","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:104::/64"}}},"[fdb0:a840:2504:355::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b9877212-212b-4588-b818-9c7b53c5b143","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:102::/64"}}},"[fdb0:a840:2504:3d2::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"c3a0f8be-5b05-4ee8-8c4e-2514de6501b6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:101::/64"}}}},"config":{"rack_subnet":"fd00:1122:3344:100::","trust_quorum_peers":[{"type":"gimlet","identifier":"BRM42220081","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220046","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM44220001","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220004","model":"913-0000019","revision":6}],"bootstrap_discovery":{"type":"only_these","addrs":["fdb0:a840:2504:3d2::1","fdb0:a840:2504:355::1","fdb0:a840:2504:396::1","fdb0:a840:2504:157::1"]},"ntp_servers":["ntp.eng.oxide.computer"],"dns_servers":["1.1.1.1","9.9.9.9"],"internal_services_ip_pool_ranges":[{"first":"172.20.28.1","last":"172.20.28.10"}],"external_dns_ips":["172.20.28.1"],"external_dns_zone_name":"madrid.eng.oxide.computer","external_certificates":[{"cert":"","key":""}],"recovery_silo":{"silo_name":"recovery","user_name":"recovery","user_password_hash":"$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY"},"rack_network_config":{"rack_subnet":"fd00:1122:3344:1::/56","infra_ip_first":"172.20.15.37","infra_ip_last":"172.20.15.38","ports":[{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":["172.20.15.38/29"],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false},{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":["172.20.15.37/29"],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false}],"bgp":[]}}} diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json new file mode 100644 index 0000000000..69f68c60ad --- /dev/null +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -0,0 +1,164 @@ +{ + "rack_id": "ed6bcf59-9620-491d-8ebd-4a4eebf2e136", + "sleds": { + "[fdb0:a840:2504:157::1]:12346": { + "generation": 0, + "schema_version": 1, + "body": { + "id": "168e1ad6-1e4b-4f7a-b894-157974bd8bb8", + "rack_id": "ed6bcf59-9620-491d-8ebd-4a4eebf2e136", + "use_trust_quorum": true, + "is_lrtq_learner": false, + "subnet": { + "net": "fd00:1122:3344:104::/64" + } + } + }, + "[fdb0:a840:2504:355::1]:12346": { + "generation": 0, + "schema_version": 1, + "body": { + "id": "b9877212-212b-4588-b818-9c7b53c5b143", + "rack_id": "ed6bcf59-9620-491d-8ebd-4a4eebf2e136", + "use_trust_quorum": true, + "is_lrtq_learner": false, + "subnet": { + "net": "fd00:1122:3344:102::/64" + } + } + }, + "[fdb0:a840:2504:396::1]:12346": { + "generation": 0, + "schema_version": 1, + "body": { + "id": "b3e78a88-0f2e-476e-a8a9-2d8c90a169d6", + "rack_id": "ed6bcf59-9620-491d-8ebd-4a4eebf2e136", + "use_trust_quorum": true, + "is_lrtq_learner": false, + "subnet": { + "net": "fd00:1122:3344:103::/64" + } + } + }, + "[fdb0:a840:2504:3d2::1]:12346": { + "generation": 0, + "schema_version": 1, + "body": { + "id": "c3a0f8be-5b05-4ee8-8c4e-2514de6501b6", + "rack_id": "ed6bcf59-9620-491d-8ebd-4a4eebf2e136", + "use_trust_quorum": true, + "is_lrtq_learner": false, + "subnet": { + "net": "fd00:1122:3344:101::/64" + } + } + } + }, + "config": { + "trust_quorum_peers": [ + { + "type": "gimlet", + "identifier": "BRM42220081", + "model": "913-0000019", + "revision": 6 + }, + { + "type": "gimlet", + "identifier": "BRM42220046", + "model": "913-0000019", + "revision": 6 + }, + { + "type": "gimlet", + "identifier": "BRM44220001", + "model": "913-0000019", + "revision": 6 + }, + { + "type": "gimlet", + "identifier": "BRM42220004", + "model": "913-0000019", + "revision": 6 + } + ], + "bootstrap_discovery": { + "type": "only_these", + "addrs": [ + "fdb0:a840:2504:157::1", + "fdb0:a840:2504:355::1", + "fdb0:a840:2504:396::1", + "fdb0:a840:2504:3d2::1" + ] + }, + "ntp_servers": [ + "ntp.eng.oxide.computer" + ], + "dns_servers": [ + "1.1.1.1", + "9.9.9.9" + ], + "internal_services_ip_pool_ranges": [ + { + "first": "172.20.28.1", + "last": "172.20.28.10" + } + ], + "external_dns_ips": [ + "172.20.28.1" + ], + "external_dns_zone_name": "madrid.eng.oxide.computer", + "external_certificates": [ + { + "cert": "", + "key": "" + } + ], + "recovery_silo": { + "silo_name": "recovery", + "user_name": "recovery", + "user_password_hash": "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY" + }, + "rack_network_config": { + "rack_subnet": "fd00:1122:3344:1::/56", + "infra_ip_first": "172.20.15.37", + "infra_ip_last": "172.20.15.38", + "ports": [ + { + "routes": [ + { + "destination": "0.0.0.0/0", + "nexthop": "172.20.15.33" + } + ], + "addresses": [ + "172.20.15.38/29" + ], + "switch": "switch0", + "port": "qsfp0", + "uplink_port_speed": "speed40_g", + "uplink_port_fec": "none", + "bgp_peers": [], + "autoneg": false + }, + { + "routes": [ + { + "destination": "0.0.0.0/0", + "nexthop": "172.20.15.33" + } + ], + "addresses": [ + "172.20.15.37/29" + ], + "switch": "switch1", + "port": "qsfp0", + "uplink_port_speed": "speed40_g", + "uplink_port_fec": "none", + "bgp_peers": [], + "autoneg": false + } + ], + "bgp": [] + } + } +} \ No newline at end of file diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index f7a93260e3..6c874d9a70 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -4,14 +4,6 @@ # Agent API. See the `RackInitializeRequest` type in bootstrap-agent or its # OpenAPI spec (in openapi/bootstrap-agent.json in the root of this workspace). -# The /56 subnet for this rack. This subnet is internal to the rack and fully -# managed by Omicron, so you can pick anything you want within the IPv6 Unique -# Local Address (ULA) range. The rack-specific /56 subnet also implies the -# parent /48 AZ subnet. -# |............| <- This /48 is the AZ Subnet -# |...............| <- This /56 is the Rack Subnet -rack_subnet = "fd00:1122:3344:0100::" - # Only include "our own sled" in the bootstrap network bootstrap_discovery.type = "only_ours" @@ -88,7 +80,14 @@ last = "192.168.1.29" # Configuration to bring up Boundary Services and make Nexus reachable from the # outside. See docs/how-to-run.adoc for more on what to put here. [rack_network_config] -rack_subnet = "fd00:1122:3344:01::/56" +# The /56 subnet for this rack. This subnet is internal to the rack and fully +# managed by Omicron, so you can pick anything you want within the IPv6 Unique +# Local Address (ULA) range. The rack-specific /56 subnet also implies the +# parent /48 AZ subnet. +# |............| <- This /48 is the AZ Subnet +# |...............| <- This /56 is the Rack Subnet +rack_subnet = "fd00:1122:3344:0100::/56" + # A range of IP addresses used by Boundary Services on the external network. In # a real system, these would be addresses of the uplink ports on the Sidecar. # With softnpu, only one address is used. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 12cb2afd24..d0b4f94d9f 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -4,14 +4,6 @@ # Agent API. See the `RackInitializeRequest` type in bootstrap-agent or its # OpenAPI spec (in openapi/bootstrap-agent.json in the root of this workspace). -# The /56 subnet for this rack. This subnet is internal to the rack and fully -# managed by Omicron, so you can pick anything you want within the IPv6 Unique -# Local Address (ULA) range. The rack-specific /56 subnet also implies the -# parent /48 AZ subnet. -# |............| <- This /48 is the AZ Subnet -# |...............| <- This /56 is the Rack Subnet -rack_subnet = "fd00:1122:3344:0100::" - # Only include "our own sled" in the bootstrap network bootstrap_discovery.type = "only_ours" @@ -88,7 +80,14 @@ last = "192.168.1.29" # Configuration to bring up Boundary Services and make Nexus reachable from the # outside. See docs/how-to-run.adoc for more on what to put here. [rack_network_config] -rack_subnet = "fd00:1122:3344:01::/56" +# The /56 subnet for this rack. This subnet is internal to the rack and fully +# managed by Omicron, so you can pick anything you want within the IPv6 Unique +# Local Address (ULA) range. The rack-specific /56 subnet also implies the +# parent /48 AZ subnet. +# |............| <- This /48 is the AZ Subnet +# |...............| <- This /56 is the Rack Subnet +rack_subnet = "fd00:1122:3344:0100::/56" + # A range of IP addresses used by Boundary Services on the external network. In # a real system, these would be addresses of the uplink ports on the Sidecar. # With softnpu, only one address is used. diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs index e3d5fad5fb..f28c0639a9 100644 --- a/wicket-common/src/rack_setup.rs +++ b/wicket-common/src/rack_setup.rs @@ -5,12 +5,24 @@ // Copyright 2023 Oxide Computer Company use omicron_common::address; -use omicron_common::api::internal::shared::RackNetworkConfig; +use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::PortConfigV1; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeSet; use std::net::IpAddr; +use std::net::Ipv4Addr; + +/// User-specified parts of +/// [`RackNetworkConfig`](omicron_common::api::internal::shared::RackNetworkConfig). +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct UserSpecifiedRackNetworkConfig { + pub infra_ip_first: Ipv4Addr, + pub infra_ip_last: Ipv4Addr, + pub ports: Vec, + pub bgp: Vec, +} // The portion of `CurrentRssUserConfig` that can be posted in one shot; it is // provided by the wicket user uploading a TOML file, currently. @@ -27,5 +39,5 @@ pub struct PutRssUserConfigInsensitive { pub internal_services_ip_pool_ranges: Vec, pub external_dns_ips: Vec, pub external_dns_zone_name: String, - pub rack_network_config: RackNetworkConfig, + pub rack_network_config: UserSpecifiedRackNetworkConfig, } diff --git a/wicket/src/cli/rack_setup/config_template.toml b/wicket/src/cli/rack_setup/config_template.toml index 2886fa01d7..d091237b5f 100644 --- a/wicket/src/cli/rack_setup/config_template.toml +++ b/wicket/src/cli/rack_setup/config_template.toml @@ -40,7 +40,6 @@ bootstrap_sleds = [] # TODO: docs on network config [rack_network_config] -rack_subnet = "" infra_ip_first = "" infra_ip_last = "" diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 5a8e8a560e..d050610c30 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -19,7 +19,7 @@ use wicket_common::rack_update::SpType; use wicketd_client::types::BootstrapSledDescription; use wicketd_client::types::CurrentRssUserConfigInsensitive; use wicketd_client::types::IpRange; -use wicketd_client::types::RackNetworkConfigV1; +use wicketd_client::types::UserSpecifiedRackNetworkConfig; static TEMPLATE: &str = include_str!("config_template.toml"); @@ -176,7 +176,7 @@ fn build_sleds_array(sleds: &[BootstrapSledDescription]) -> Array { fn populate_network_table( table: &mut Table, - config: Option<&RackNetworkConfigV1>, + config: Option<&UserSpecifiedRackNetworkConfig>, ) { // Helper function to serialize enums into their appropriate string // representations. @@ -195,7 +195,6 @@ fn populate_network_table( }; for (property, value) in [ - ("rack_subnet", config.rack_subnet.to_string()), ("infra_ip_first", config.infra_ip_first.to_string()), ("infra_ip_last", config.infra_ip_last.to_string()), ] { @@ -350,7 +349,6 @@ fn populate_network_table( #[cfg(test)] mod tests { use super::*; - use omicron_common::api::internal::shared::RackNetworkConfigV1 as InternalRackNetworkConfig; use std::net::Ipv6Addr; use wicket_common::rack_setup::PutRssUserConfigInsensitive; use wicket_common::rack_update::SpIdentifier; @@ -373,6 +371,7 @@ mod tests { use omicron_common::api::internal::shared::PortSpeed as InternalPortSpeed; use omicron_common::api::internal::shared::RouteConfig as InternalRouteConfig; use omicron_common::api::internal::shared::SwitchLocation as InternalSwitchLocation; + use wicket_common::rack_setup::UserSpecifiedRackNetworkConfig as InternalUserSpecifiedRackNetworkConfig; let rnc = value.rack_network_config.unwrap(); @@ -401,8 +400,7 @@ mod tests { .collect(), external_dns_ips: value.external_dns_ips, ntp_servers: value.ntp_servers, - rack_network_config: InternalRackNetworkConfig { - rack_subnet: rnc.rack_subnet, + rack_network_config: InternalUserSpecifiedRackNetworkConfig { infra_ip_first: rnc.infra_ip_first, infra_ip_last: rnc.infra_ip_last, ports: rnc @@ -514,8 +512,7 @@ mod tests { )], external_dns_ips: vec!["10.0.0.1".parse().unwrap()], ntp_servers: vec!["ntp1.com".into(), "ntp2.com".into()], - rack_network_config: Some(RackNetworkConfigV1 { - rack_subnet: "fd00:1122:3344:01::/56".parse().unwrap(), + rack_network_config: Some(UserSpecifiedRackNetworkConfig { infra_ip_first: "172.30.0.1".parse().unwrap(), infra_ip_last: "172.30.0.10".parse().unwrap(), ports: vec![PortConfigV1 { diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index 9c1740679f..9748a93bd5 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -32,7 +32,6 @@ use http::StatusCode; use internal_dns::resolver::Resolver; use omicron_common::address; use omicron_common::api::external::SemverVersion; -use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::update::ArtifactHashId; use omicron_common::update::ArtifactId; @@ -47,6 +46,7 @@ use std::net::IpAddr; use std::net::Ipv6Addr; use std::time::Duration; use wicket_common::rack_setup::PutRssUserConfigInsensitive; +use wicket_common::rack_setup::UserSpecifiedRackNetworkConfig; use wicket_common::update_events::EventReport; use wicket_common::WICKETD_TIMEOUT; @@ -172,7 +172,7 @@ pub struct CurrentRssUserConfigInsensitive { pub internal_services_ip_pool_ranges: Vec, pub external_dns_ips: Vec, pub external_dns_zone_name: String, - pub rack_network_config: Option, + pub rack_network_config: Option, } // This is a summary of the subset of `RackInitializeRequest` that is sensitive; @@ -1189,12 +1189,14 @@ async fn post_start_preflight_uplink_check( let (network_config, dns_servers, ntp_servers) = { let rss_config = rqctx.rss_config.lock().unwrap(); - let network_config = - rss_config.rack_network_config().cloned().ok_or_else(|| { + let network_config = rss_config + .user_specified_rack_network_config() + .cloned() + .ok_or_else(|| { HttpError::for_bad_request( None, "uplink preflight check requires setting \ - the uplink config for RSS" + the uplink config for RSS" .to_string(), ) })?; diff --git a/wicketd/src/preflight_check.rs b/wicketd/src/preflight_check.rs index 75cc5f6e09..4cd17604a0 100644 --- a/wicketd/src/preflight_check.rs +++ b/wicketd/src/preflight_check.rs @@ -2,7 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::internal::shared::SwitchLocation; use slog::o; use slog::Logger; @@ -12,6 +11,7 @@ use std::sync::Mutex; use tokio::sync::oneshot; use update_engine::events::EventReport; use update_engine::GenericSpec; +use wicket_common::rack_setup::UserSpecifiedRackNetworkConfig; mod uplink; @@ -44,7 +44,7 @@ impl PreflightCheckerHandler { pub(crate) async fn uplink_start( &self, - network_config: RackNetworkConfig, + network_config: UserSpecifiedRackNetworkConfig, dns_servers: Vec, ntp_servers: Vec, our_switch_location: SwitchLocation, @@ -94,7 +94,7 @@ pub(crate) struct PreflightCheckerBusy; #[derive(Debug)] enum PreflightCheck { Uplink { - network_config: RackNetworkConfig, + network_config: UserSpecifiedRackNetworkConfig, dns_servers: Vec, ntp_servers: Vec, our_switch_location: SwitchLocation, diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 47995f0c10..31d479a5ed 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -22,7 +22,6 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::api::internal::shared::PortConfigV1; use omicron_common::api::internal::shared::PortFec as OmicronPortFec; use omicron_common::api::internal::shared::PortSpeed as OmicronPortSpeed; -use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::OMICRON_DPD_TAG; use schemars::JsonSchema; @@ -49,6 +48,7 @@ use trust_dns_resolver::error::ResolveError; use trust_dns_resolver::error::ResolveErrorKind; use trust_dns_resolver::TokioAsyncResolver; use update_engine::StepSpec; +use wicket_common::rack_setup::UserSpecifiedRackNetworkConfig; const DNS_PORT: u16 = 53; @@ -68,7 +68,7 @@ const IPADM: &str = "/usr/sbin/ipadm"; const ROUTE: &str = "/usr/sbin/route"; pub(super) async fn run_local_uplink_preflight_check( - network_config: RackNetworkConfig, + network_config: UserSpecifiedRackNetworkConfig, dns_servers: Vec, ntp_servers: Vec, our_switch_location: SwitchLocation, diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index f654597d81..4bc1a6b62b 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -26,7 +26,6 @@ use gateway_client::types::SpType; use omicron_certificates::CertificateError; use omicron_common::address; use omicron_common::address::Ipv4Range; -use omicron_common::api::internal::shared::RackNetworkConfig; use sled_hardware::Baseboard; use slog::warn; use std::collections::BTreeSet; @@ -34,6 +33,7 @@ use std::mem; use std::net::IpAddr; use std::net::Ipv6Addr; use wicket_common::rack_setup::PutRssUserConfigInsensitive; +use wicket_common::rack_setup::UserSpecifiedRackNetworkConfig; // TODO-correctness For now, we always use the same rack subnet when running // RSS. When we get to multirack, this will be wrong, but there are many other @@ -64,7 +64,7 @@ pub(crate) struct CurrentRssConfig { external_dns_zone_name: String, external_certificates: Vec, recovery_silo_password_hash: Option, - rack_network_config: Option, + rack_network_config: Option, // External certificates are uploaded in two separate actions (cert then // key, or vice versa). Here we store a partial certificate; once we have @@ -82,7 +82,9 @@ impl CurrentRssConfig { &self.ntp_servers } - pub(crate) fn rack_network_config(&self) -> Option<&RackNetworkConfig> { + pub(crate) fn user_specified_rack_network_config( + &self, + ) -> Option<&UserSpecifiedRackNetworkConfig> { self.rack_network_config.as_ref() } @@ -252,7 +254,6 @@ impl CurrentRssConfig { .collect(); let request = RackInitializeRequest { - rack_subnet: RACK_SUBNET, trust_quorum_peers, bootstrap_discovery: BootstrapAddressDiscovery::OnlyThese( bootstrap_ips, @@ -268,7 +269,7 @@ impl CurrentRssConfig { user_name: UserId(RECOVERY_SILO_USERNAME.into()), user_password_hash, }, - rack_network_config: Some(rack_network_config), + rack_network_config, }; Ok(request) @@ -452,7 +453,7 @@ impl From<&'_ CurrentRssConfig> for CurrentRssUserConfig { } fn validate_rack_network_config( - config: &RackNetworkConfig, + config: &UserSpecifiedRackNetworkConfig, ) -> Result { use bootstrap_agent_client::types::BgpConfig as BaBgpConfig; use bootstrap_agent_client::types::BgpPeerConfig as BaBgpPeerConfig; @@ -497,7 +498,7 @@ fn validate_rack_network_config( // TODO Add more client side checks on `rack_network_config` contents? Ok(bootstrap_agent_client::types::RackNetworkConfigV1 { - rack_subnet: config.rack_subnet, + rack_subnet: RACK_SUBNET.into(), infra_ip_first: config.infra_ip_first, infra_ip_last: config.infra_ip_last, ports: config