From ea4da471c4abdb71edc7afca255e0c441cab5529 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Sat, 4 Nov 2023 09:38:48 -0700 Subject: [PATCH] Support for propolis-based softnpu device, fix multi-switch uplink updates. (#4390) --- common/src/lib.rs | 2 + illumos-utils/src/running_zone.rs | 8 +- illumos-utils/src/svc.rs | 32 ++++++- nexus/src/app/sagas/mod.rs | 2 +- .../app/sagas/switch_port_settings_apply.rs | 93 ++++++++++++------- .../app/sagas/switch_port_settings_clear.rs | 12 ++- nexus/tests/integration_tests/switch_port.rs | 4 + package-manifest.toml | 20 ++-- sled-agent/Cargo.toml | 1 + sled-agent/src/bootstrap/early_networking.rs | 25 +++-- sled-agent/src/bootstrap/pre_server.rs | 13 ++- sled-agent/src/config.rs | 3 +- sled-agent/src/instance.rs | 2 +- sled-agent/src/params.rs | 1 + sled-agent/src/services.rs | 82 ++++++++++++---- sled-hardware/src/illumos/mod.rs | 51 +++++----- sled-hardware/src/lib.rs | 7 +- smf/sled-agent/non-gimlet/config.toml | 2 +- tools/create_virtual_hardware.sh | 10 +- tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- wicketd/src/preflight_check/uplink.rs | 11 ++- 25 files changed, 273 insertions(+), 128 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index ced10abb1a..1d2ed0afdb 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -75,3 +75,5 @@ impl slog::KV for FileKv { ) } } + +pub const OMICRON_DPD_TAG: &str = "omicron"; diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 805419cb5d..bdf7ed0cbf 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -517,12 +517,12 @@ impl RunningZone { // services are up, so future requests to create network addresses // or manipulate services will work. let fmri = "svc:/milestone/single-user:default"; - wait_for_service(Some(&zone.name), fmri).await.map_err(|_| { - BootError::Timeout { + wait_for_service(Some(&zone.name), fmri, zone.log.clone()) + .await + .map_err(|_| BootError::Timeout { service: fmri.to_string(), zone: zone.name.to_string(), - } - })?; + })?; // If the zone is self-assembling, then SMF service(s) inside the zone // will be creating the listen address for the zone's service(s), diff --git a/illumos-utils/src/svc.rs b/illumos-utils/src/svc.rs index b08679b720..a16795771c 100644 --- a/illumos-utils/src/svc.rs +++ b/illumos-utils/src/svc.rs @@ -12,6 +12,7 @@ use omicron_common::backoff; #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] mod inner { use super::*; + use slog::{warn, Logger}; // TODO(https://www.illumos.org/issues/13837): This is a hack; // remove me when when fixed. Ideally, the ".synchronous()" argument @@ -27,10 +28,19 @@ mod inner { pub async fn wait_for_service<'a, 'b>( zone: Option<&'a str>, fmri: &'b str, + log: Logger, ) -> Result<(), Error> { let name = smf::PropertyName::new("restarter", "state").unwrap(); - let log_notification_failure = |_error, _delay| {}; + let log_notification_failure = |error, delay| { + warn!( + log, + "wait for service {:?} failed: {}. retry in {:?}", + zone, + error, + delay + ); + }; backoff::retry_notify( backoff::retry_policy_local(), || async { @@ -47,6 +57,26 @@ mod inner { == &smf::PropertyValue::Astring("online".to_string()) { return Ok(()); + } else { + // This is helpful in virtual environments where + // services take a few tries to come up. To enable, + // compile with RUSTFLAGS="--cfg svcadm_autoclear" + #[cfg(svcadm_autoclear)] + if let Some(zname) = zone { + if let Err(out) = + tokio::process::Command::new(crate::PFEXEC) + .env_clear() + .arg("svcadm") + .arg("-z") + .arg(zname) + .arg("clear") + .arg("*") + .output() + .await + { + warn!(log, "clearing service maintenance failed: {out}"); + }; + } } } return Err(backoff::BackoffError::transient( diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 83e0e9b8b4..5b1843be3d 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -313,7 +313,7 @@ macro_rules! declare_saga_actions { }; } -pub(crate) const NEXUS_DPD_TAG: &str = "nexus"; +use omicron_common::OMICRON_DPD_TAG as NEXUS_DPD_TAG; pub(crate) use __action_name; pub(crate) use __emit_action; diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index fb06dc5fc0..830792826e 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{NexusActionContext, NEXUS_DPD_TAG}; +use crate::app::map_switch_zone_addrs; use crate::app::sagas::retry_until_known_result; use crate::app::sagas::{ declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, @@ -15,15 +16,17 @@ use dpd_client::types::{ RouteSettingsV4, RouteSettingsV6, }; use dpd_client::{Ipv4Cidr, Ipv6Cidr}; +use internal_dns::ServiceName; use ipnetwork::IpNetwork; use mg_admin_client::types::Prefix4; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, BgpRoute}; +use mg_admin_client::types::{ApplyRequest, BgpPeerConfig}; use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed, NETWORK_KEY}; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; use nexus_types::external_api::params; -use omicron_common::api::external::{self, DataPageParams, NameOrId}; +use omicron_common::address::SLED_AGENT_PORT; +use omicron_common::api::external::{self, NameOrId}; use omicron_common::api::internal::shared::{ ParseSwitchLocationError, SwitchLocation, }; @@ -35,8 +38,8 @@ use sled_agent_client::types::{ BgpPeerConfig as OmicronBgpPeerConfig, HostPortConfig, }; use std::collections::HashMap; -use std::net::IpAddr; use std::net::SocketAddrV6; +use std::net::{IpAddr, Ipv6Addr}; use std::str::FromStr; use std::sync::Arc; use steno::ActionError; @@ -177,7 +180,6 @@ pub(crate) fn api_to_dpd_port_settings( settings: &SwitchPortSettingsCombinedResult, ) -> Result { let mut dpd_port_settings = PortSettings { - tag: NEXUS_DPD_TAG.into(), links: HashMap::new(), v4_routes: HashMap::new(), v6_routes: HashMap::new(), @@ -192,6 +194,7 @@ pub(crate) fn api_to_dpd_port_settings( LinkSettings { params: LinkCreate { autoneg: false, + lane: Some(LinkId(0)), kr: false, fec: match l.fec { SwitchLinkFec::Firecode => PortFec::Firecode, @@ -283,7 +286,13 @@ async fn spa_ensure_switch_port_settings( })?; retry_until_known_result(log, || async { - dpd_client.port_settings_apply(&port_id, &dpd_port_settings).await + dpd_client + .port_settings_apply( + &port_id, + Some(NEXUS_DPD_TAG), + &dpd_port_settings, + ) + .await }) .await .map_err(|e| match e { @@ -331,7 +340,9 @@ async fn spa_undo_ensure_switch_port_settings( Some(id) => id, None => { retry_until_known_result(log, || async { - dpd_client.port_settings_clear(&port_id).await + dpd_client + .port_settings_clear(&port_id, Some(NEXUS_DPD_TAG)) + .await }) .await .map_err(|e| external::Error::internal_error(&e.to_string()))?; @@ -355,7 +366,13 @@ async fn spa_undo_ensure_switch_port_settings( })?; retry_until_known_result(log, || async { - dpd_client.port_settings_apply(&port_id, &dpd_port_settings).await + dpd_client + .port_settings_apply( + &port_id, + Some(NEXUS_DPD_TAG), + &dpd_port_settings, + ) + .await }) .await .map_err(|e| external::Error::internal_error(&e.to_string()))?; @@ -418,22 +435,6 @@ pub(crate) async fn ensure_switch_port_bgp_settings( )) })?; - // TODO picking the first configured address by default, but this needs - // to be something that can be specified in the API. - let nexthop = match settings.addresses.get(0) { - Some(switch_port_addr) => Ok(switch_port_addr.address.ip()), - None => Err(ActionError::action_failed( - "at least one address required for bgp peering".to_string(), - )), - }?; - - let nexthop = match nexthop { - IpAddr::V4(nexthop) => Ok(nexthop), - IpAddr::V6(_) => Err(ActionError::action_failed( - "IPv6 nexthop not yet supported".to_string(), - )), - }?; - let mut prefixes = Vec::new(); for a in &announcements { let value = match a.network.ip() { @@ -455,7 +456,7 @@ pub(crate) async fn ensure_switch_port_bgp_settings( connect_retry: peer.connect_retry.0.into(), keepalive: peer.keepalive.0.into(), resolution: BGP_SESSION_RESOLUTION, - routes: vec![BgpRoute { nexthop, prefixes }], + originate: prefixes, }; bgp_peer_configs.push(bpc); @@ -809,7 +810,7 @@ pub(crate) async fn select_mg_client( } pub(crate) async fn get_scrimlet_address( - _location: SwitchLocation, + location: SwitchLocation, nexus: &Arc, ) -> Result { /* TODO this depends on DNS entries only coming from RSS, it's broken @@ -826,21 +827,41 @@ pub(crate) async fn get_scrimlet_address( )) }) */ - let opctx = &nexus.opctx_for_internal_api(); - Ok(nexus - .sled_list(opctx, &DataPageParams::max_page()) + let result = nexus + .resolver() + .await + .lookup_all_ipv6(ServiceName::Dendrite) .await .map_err(|e| { ActionError::action_failed(format!( - "get_scrimlet_address: failed to list sleds: {e}" + "scrimlet dns lookup failed {e}", )) - })? - .into_iter() - .find(|x| x.is_scrimlet()) - .ok_or(ActionError::action_failed( - "get_scrimlet_address: no scrimlets found".to_string(), - ))? - .address()) + }); + + let mappings = match result { + Ok(addrs) => map_switch_zone_addrs(&nexus.log, addrs).await, + Err(e) => { + warn!(nexus.log, "Failed to lookup Dendrite address: {e}"); + return Err(ActionError::action_failed(format!( + "switch mapping failed {e}", + ))); + } + }; + + let addr = match mappings.get(&location) { + Some(addr) => addr, + None => { + return Err(ActionError::action_failed(format!( + "address for switch at location: {location} not found", + ))); + } + }; + + let mut segments = addr.segments(); + segments[7] = 1; + let addr = Ipv6Addr::from(segments); + + Ok(SocketAddrV6::new(addr, SLED_AGENT_PORT, 0, 0)) } #[derive(Clone, Debug)] diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index 14544b0f55..1ab2f6be0c 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::switch_port_settings_apply::select_dendrite_client; -use super::NexusActionContext; +use super::{NexusActionContext, NEXUS_DPD_TAG}; use crate::app::sagas::retry_until_known_result; use crate::app::sagas::switch_port_settings_apply::{ api_to_dpd_port_settings, apply_bootstore_update, bootstore_update, @@ -154,7 +154,7 @@ async fn spa_clear_switch_port_settings( let dpd_client = select_dendrite_client(&sagactx).await?; retry_until_known_result(log, || async { - dpd_client.port_settings_clear(&port_id).await + dpd_client.port_settings_clear(&port_id, Some(NEXUS_DPD_TAG)).await }) .await .map_err(|e| ActionError::action_failed(e.to_string()))?; @@ -197,7 +197,13 @@ async fn spa_undo_clear_switch_port_settings( .map_err(ActionError::action_failed)?; retry_until_known_result(log, || async { - dpd_client.port_settings_apply(&port_id, &dpd_port_settings).await + dpd_client + .port_settings_apply( + &port_id, + Some(NEXUS_DPD_TAG), + &dpd_port_settings, + ) + .await }) .await .map_err(|e| external::Error::internal_error(&e.to_string()))?; diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index fada45694d..ccd0b50fbe 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -24,6 +24,10 @@ use omicron_common::api::external::{ type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; +// TODO: unfortunately this test can no longer be run in the integration test +// suite because it depends on communicating with MGS which is not part +// of the infrastructure available in the integration test context. +#[ignore] #[nexus_test] async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { let client = &ctx.external_client; diff --git a/package-manifest.toml b/package-manifest.toml index b8ffb2756a..c6949e4437 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -422,7 +422,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d7169a61fd8833b3a1e6f46d897ca3295b2a28b6" +source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt source.sha256 = "d871406ed926571efebdab248de08d4f1ca6c31d4f9a691ce47b186474165c57" @@ -438,7 +438,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d7169a61fd8833b3a1e6f46d897ca3295b2a28b6" +source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "85ec05a8726989b5cb0a567de6b0855f6f84b6f3409ac99ccaf372be5821e45d" @@ -453,10 +453,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d7169a61fd8833b3a1e6f46d897ca3295b2a28b6" +source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "452dfb3491e1b6d4df6be1cb689921f59623aed082e47606a78c0f44d918f66a" +source.sha256 = "1badd6adfece0a1b661f7efb9a2ca65e471f45cf9c8ecbd72b228ca174311e31" output.type = "zone" output.intermediate_only = true @@ -473,8 +473,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "343e3a572cc02efe3f8b68f9affd008623a33966" -source.sha256 = "0808f331741e02d55e199847579dfd01f3658b21c7122cef8c3f9279f43dbab0" +source.commit = "559fad2f379900a05ced410944353c1d19100390" +source.sha256 = "ce14c1f0481b13ce47a25386a3b1e49d9570f4c1c31cad3f13c14f75b130dafa" output.type = "zone" output.intermediate_only = true @@ -498,8 +498,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "343e3a572cc02efe3f8b68f9affd008623a33966" -source.sha256 = "c359de1be5073a484d86d4c58e8656a36002ce1dc38506f97b730e21615ccae1" +source.commit = "559fad2f379900a05ced410944353c1d19100390" +source.sha256 = "1a1246e2e596f36182eb6a5e402c272d0cd91aab351c5289cc4a29cb822c8888" output.type = "zone" output.intermediate_only = true @@ -516,8 +516,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "343e3a572cc02efe3f8b68f9affd008623a33966" -source.sha256 = "110bfbfb2cf3d3471f3e3a64d26129c7a02f6c5857f9623ebb99690728c3b2ff" +source.commit = "559fad2f379900a05ced410944353c1d19100390" +source.sha256 = "b9ff0f7f9e6193f4fa0aff77f7ec80f726f431ce1024a88021f207beb9079793" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index ff9644773a..3889be5eff 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -119,5 +119,6 @@ machine-non-gimlet = [] switch-asic = [] switch-stub = [] switch-softnpu = [] +switch-hypersoftnpu = [] rack-topology-single-sled = [] rack-topology-multi-sled = [] diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 9adfa47d9b..a8aa978f9d 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -27,6 +27,7 @@ use omicron_common::backoff::{ retry_notify, retry_policy_local, BackoffError, ExponentialBackoff, ExponentialBackoffBuilder, }; +use omicron_common::OMICRON_DPD_TAG; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use slog::Logger; @@ -403,7 +404,7 @@ impl<'a> EarlyNetworkSetup<'a> { let dpd = DpdClient::new( &format!("http://[{}]:{}", switch_zone_underlay_ip, DENDRITE_PORT), dpd_client::ClientState { - tag: "early_networking".to_string(), + tag: OMICRON_DPD_TAG.into(), log: self.log.new(o!("component" => "DpdClient")), }, ); @@ -432,13 +433,17 @@ impl<'a> EarlyNetworkSetup<'a> { "Configuring default uplink on switch"; "config" => #?dpd_port_settings ); - dpd.port_settings_apply(&port_id, &dpd_port_settings) - .await - .map_err(|e| { - EarlyNetworkSetupError::Dendrite(format!( - "unable to apply uplink port configuration: {e}" - )) - })?; + dpd.port_settings_apply( + &port_id, + Some(OMICRON_DPD_TAG), + &dpd_port_settings, + ) + .await + .map_err(|e| { + EarlyNetworkSetupError::Dendrite(format!( + "unable to apply uplink port configuration: {e}" + )) + })?; info!(self.log, "advertising boundary services loopback address"); @@ -462,10 +467,9 @@ impl<'a> EarlyNetworkSetup<'a> { "failed to parse `BOUNDARY_SERVICES_ADDR` as `Ipv6Addr`: {e}" )) })?, - tag: "rss".into(), + tag: OMICRON_DPD_TAG.into(), }; let mut dpd_port_settings = PortSettings { - tag: "rss".into(), links: HashMap::new(), v4_routes: HashMap::new(), v6_routes: HashMap::new(), @@ -488,6 +492,7 @@ impl<'a> EarlyNetworkSetup<'a> { kr: false, fec: convert_fec(&port_config.uplink_port_fec), speed: convert_speed(&port_config.uplink_port_speed), + lane: Some(LinkId(0)), }, //addrs: vec![addr], addrs, diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 0c19c30865..05493f5aa3 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -14,6 +14,7 @@ use super::maghemite; use super::secret_retriever::LrtqOrHardcodedSecretRetriever; use super::server::StartError; use crate::config::Config; +use crate::config::SidecarRevision; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; use crate::storage_manager::StorageManager; @@ -339,6 +340,7 @@ async fn cleanup_all_old_global_state(log: &Logger) -> Result<(), StartError> { } fn enable_mg_ddm(config: &Config, log: &Logger) -> Result<(), StartError> { + info!(log, "finding links {:?}", config.data_links); let mg_addr_objs = underlay::find_nics(&config.data_links) .map_err(StartError::FindMaghemiteAddrObjs)?; if mg_addr_objs.is_empty() { @@ -423,7 +425,16 @@ fn sled_mode_from_config(config: &Config) -> Result { } else if cfg!(feature = "switch-stub") { DendriteAsic::TofinoStub } else if cfg!(feature = "switch-softnpu") { - DendriteAsic::SoftNpu + match config.sidecar_revision { + SidecarRevision::SoftZone(_) => DendriteAsic::SoftNpuZone, + SidecarRevision::SoftPropolis(_) => { + DendriteAsic::SoftNpuPropolisDevice + } + _ => return Err(StartError::IncorrectBuildPackaging( + "sled-agent configured to run on softnpu zone but dosen't \ + have a softnpu sidecar revision", + )), + } } else { return Err(StartError::IncorrectBuildPackaging( "sled-agent configured to run on scrimlet but wasn't \ diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 2473c14566..fad4b2e94b 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -28,7 +28,8 @@ pub enum SledMode { #[serde(rename_all = "snake_case")] pub enum SidecarRevision { Physical(String), - Soft(SoftPortConfig), + SoftZone(SoftPortConfig), + SoftPropolis(SoftPortConfig), } #[derive(Debug, Clone, Deserialize)] diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 94614c2363..5c61993293 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -984,7 +984,7 @@ impl Instance { // but it helps distinguish "online in SMF" from "responding to HTTP // requests". let fmri = fmri_name(); - wait_for_service(Some(&zname), &fmri) + wait_for_service(Some(&zname), &fmri, inner.log.clone()) .await .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(inner.log, "Propolis SMF service is online"); diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 5fda3c1ae6..cd84c9acd4 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -347,6 +347,7 @@ pub enum ServiceType { #[serde(skip)] Tfport { pkt_source: String, + asic: DendriteAsic, }, #[serde(skip)] Uplink, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index a9be0e7c4a..d1d8dbfff0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -676,6 +676,11 @@ impl ServiceManager { device: "tofino".to_string(), }); } + ServiceType::Dendrite { + asic: DendriteAsic::SoftNpuPropolisDevice, + } => { + devices.push("/dev/tty03".into()); + } _ => (), } } @@ -741,7 +746,7 @@ impl ServiceManager { for svc in &req.services { match &svc.details { - ServiceType::Tfport { pkt_source } => { + ServiceType::Tfport { pkt_source, asic: _ } => { // The tfport service requires a MAC device to/from which sidecar // packets may be multiplexed. If the link isn't present, don't // bother trying to start the zone. @@ -772,9 +777,13 @@ impl ServiceManager { } Err(_) => { - return Err(Error::MissingDevice { - device: link.to_string(), - }); + if let SidecarRevision::SoftZone(_) = + self.inner.sidecar_revision + { + return Err(Error::MissingDevice { + device: link.to_string(), + }); + } } } } @@ -1815,14 +1824,21 @@ impl ServiceManager { "config/port_config", "/opt/oxide/dendrite/misc/model_config.toml", )?, - DendriteAsic::SoftNpu => { - smfh.setprop("config/mgmt", "uds")?; - smfh.setprop( - "config/uds_path", - "/opt/softnpu/stuff", - )?; + asic @ (DendriteAsic::SoftNpuZone + | DendriteAsic::SoftNpuPropolisDevice) => { + if asic == &DendriteAsic::SoftNpuZone { + smfh.setprop("config/mgmt", "uds")?; + smfh.setprop( + "config/uds_path", + "/opt/softnpu/stuff", + )?; + } + if asic == &DendriteAsic::SoftNpuPropolisDevice { + smfh.setprop("config/mgmt", "uart")?; + } let s = match self.inner.sidecar_revision { - SidecarRevision::Soft(ref s) => s, + SidecarRevision::SoftZone(ref s) => s, + SidecarRevision::SoftPropolis(ref s) => s, _ => { return Err(Error::SidecarRevision( anyhow::anyhow!( @@ -1847,7 +1863,7 @@ impl ServiceManager { }; smfh.refresh()?; } - ServiceType::Tfport { pkt_source } => { + ServiceType::Tfport { pkt_source, asic } => { info!(self.inner.log, "Setting up tfport service"); let is_gimlet = is_gimlet().map_err(|e| { @@ -1882,6 +1898,12 @@ impl ServiceManager { } smfh.setprop("config/pkt_source", pkt_source)?; } + if asic == &DendriteAsic::SoftNpuZone { + smfh.setprop("config/flags", "--sync-only")?; + } + if asic == &DendriteAsic::SoftNpuPropolisDevice { + smfh.setprop("config/pkt_source", pkt_source)?; + } smfh.setprop( "config/host", &format!("[{}]", Ipv6Addr::LOCALHOST), @@ -2509,19 +2531,42 @@ impl ServiceManager { vec![ ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic }, ServiceType::ManagementGatewayService, - ServiceType::Tfport { pkt_source: "tfpkt0".to_string() }, + ServiceType::Tfport { + pkt_source: "tfpkt0".to_string(), + asic: DendriteAsic::TofinoAsic, + }, + ServiceType::Uplink, + ServiceType::Wicketd { baseboard }, + ServiceType::Mgd, + ServiceType::MgDdm { mode: "transit".to_string() }, + ] + } + + SledMode::Scrimlet { + asic: asic @ DendriteAsic::SoftNpuPropolisDevice, + } => { + data_links = vec!["vioif0".to_owned()]; + vec![ + ServiceType::Dendrite { asic }, + ServiceType::ManagementGatewayService, ServiceType::Uplink, ServiceType::Wicketd { baseboard }, ServiceType::Mgd, ServiceType::MgDdm { mode: "transit".to_string() }, + ServiceType::Tfport { + pkt_source: "vioif0".to_string(), + asic, + }, + ServiceType::SpSim, ] } // Sled is a scrimlet but is not running the real tofino driver. SledMode::Scrimlet { - asic: asic @ (DendriteAsic::TofinoStub | DendriteAsic::SoftNpu), + asic: + asic @ (DendriteAsic::TofinoStub | DendriteAsic::SoftNpuZone), } => { - if let DendriteAsic::SoftNpu = asic { + if let DendriteAsic::SoftNpuZone = asic { let softnpu_filesystem = zone::Fs { ty: "lofs".to_string(), dir: "/opt/softnpu/stuff".to_string(), @@ -2538,7 +2583,10 @@ impl ServiceManager { ServiceType::Wicketd { baseboard }, ServiceType::Mgd, ServiceType::MgDdm { mode: "transit".to_string() }, - ServiceType::Tfport { pkt_source: "tfpkt0".to_string() }, + ServiceType::Tfport { + pkt_source: "tfpkt0".to_string(), + asic, + }, ServiceType::SpSim, ] } @@ -3070,7 +3118,7 @@ mod test { // Wait for the networking service. let wait_ctx = svc::wait_for_service_context(); - wait_ctx.expect().return_once(|_, _| Ok(())); + wait_ctx.expect().return_once(|_, _, _| Ok(())); // Import the manifest, enable the service let execute_ctx = illumos_utils::execute_context(); diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 0364c98f14..c0145b75e8 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -611,34 +611,37 @@ impl HardwareManager { // receiver will receive a tokio::sync::broadcast::error::RecvError::Lagged // error, indicating they should re-scan the hardware themselves. let (tx, _) = broadcast::channel(1024); - let hw = match sled_mode { - // Treat as a possible scrimlet and setup to scan for real Tofino device. - SledMode::Auto - | SledMode::Scrimlet { asic: DendriteAsic::TofinoAsic } => { - HardwareView::new() - } + let hw = + match sled_mode { + // Treat as a possible scrimlet and setup to scan for real Tofino device. + SledMode::Auto + | SledMode::Scrimlet { asic: DendriteAsic::TofinoAsic } => { + HardwareView::new() + } - // Treat sled as gimlet and ignore any attached Tofino device. - SledMode::Gimlet => HardwareView::new_stub_tofino( - // active= - false, - ), + // Treat sled as gimlet and ignore any attached Tofino device. + SledMode::Gimlet => HardwareView::new_stub_tofino( + // active= + false, + ), - // Treat as scrimlet and use the stub Tofino device. - SledMode::Scrimlet { asic: DendriteAsic::TofinoStub } => { - HardwareView::new_stub_tofino(true) - } + // Treat as scrimlet and use the stub Tofino device. + SledMode::Scrimlet { asic: DendriteAsic::TofinoStub } => { + HardwareView::new_stub_tofino(true) + } - // Treat as scrimlet (w/ SoftNPU) and use the stub Tofino device. - // TODO-correctness: - // I'm not sure whether or not we should be treating softnpu - // as a stub or treating it as a different HardwareView variant, - // so this might change. - SledMode::Scrimlet { asic: DendriteAsic::SoftNpu } => { - HardwareView::new_stub_tofino(true) + // Treat as scrimlet (w/ SoftNPU) and use the stub Tofino device. + // TODO-correctness: + // I'm not sure whether or not we should be treating softnpu + // as a stub or treating it as a different HardwareView variant, + // so this might change. + SledMode::Scrimlet { + asic: + DendriteAsic::SoftNpuZone + | DendriteAsic::SoftNpuPropolisDevice, + } => HardwareView::new_stub_tofino(true), } - } - .map_err(|e| e.to_string())?; + .map_err(|e| e.to_string())?; let inner = Arc::new(Mutex::new(hw)); // Force the device tree to be polled at least once before returning. diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index c81bcddbfb..654dfd59d9 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -44,7 +44,8 @@ pub enum HardwareUpdate { pub enum DendriteAsic { TofinoAsic, TofinoStub, - SoftNpu, + SoftNpuZone, + SoftNpuPropolisDevice, } impl std::fmt::Display for DendriteAsic { @@ -55,7 +56,9 @@ impl std::fmt::Display for DendriteAsic { match self { DendriteAsic::TofinoAsic => "tofino_asic", DendriteAsic::TofinoStub => "tofino_stub", - DendriteAsic::SoftNpu => "soft_npu", + DendriteAsic::SoftNpuZone => "soft_npu_zone", + DendriteAsic::SoftNpuPropolisDevice => + "soft_npu_propolis_device", } ) } diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index b4cb7e6cff..684c0f8589 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -12,7 +12,7 @@ sled_mode = "scrimlet" # Identifies the revision of the sidecar that is attached, if one is attached. # TODO: This field should be removed once Gimlets have the ability to auto-detect # this information. -sidecar_revision.soft = { front_port_count = 1, rear_port_count = 1 } +sidecar_revision.soft_zone = { front_port_count = 1, rear_port_count = 1 } # Setting this to true causes sled-agent to always report that its time is # in-sync, rather than querying its NTP zone. diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index 908cb752e9..1db40208f7 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -16,6 +16,7 @@ set -x SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" OMICRON_TOP="$SOURCE_DIR/.." +SOFTNPU_MODE=${SOFTNPU_MODE:-zone}; . "$SOURCE_DIR/virtual_hardware.sh" @@ -83,6 +84,9 @@ in the SoftNPU zone later to add those entries." ensure_run_as_root ensure_zpools -ensure_simulated_links "$PHYSICAL_LINK" -warn_if_no_proxy_arp -ensure_softnpu_zone + +if [[ "$SOFTNPU_MODE" == "zone" ]]; then + ensure_simulated_links "$PHYSICAL_LINK" + warn_if_no_proxy_arp + ensure_softnpu_zone +fi diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index c91d1c2e98..55d2adfc46 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="343e3a572cc02efe3f8b68f9affd008623a33966" -SHA2="544ab42ccc7942d8ece9cdc80cd85d002bcf9d5646a291322bf2f79087ab6df0" +COMMIT="559fad2f379900a05ced410944353c1d19100390" +SHA2="82437c74afd4894aa5b9ea800d5777793e8777fe87471321dd22ad1a1c9c9ef3" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 8fa98114fb..456084adee 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="0808f331741e02d55e199847579dfd01f3658b21c7122cef8c3f9279f43dbab0" -CIDL_SHA256_LINUX_DPD="3e276dd553dd7cdb75c8ad023c2cd29b91485fafb94f27097a745b2b7ef5ecea" -CIDL_SHA256_LINUX_SWADM="645faf8a93bcae9814b2f116bccd66a54763332b56220e93b66316c853ce13d2" +CIDL_SHA256_ILLUMOS="ce14c1f0481b13ce47a25386a3b1e49d9570f4c1c31cad3f13c14f75b130dafa" +CIDL_SHA256_LINUX_DPD="d7da0aaed4e824a8e98b1a39e9ee41ad934ce38b0faa140ab4e7e2ca8c194e4e" +CIDL_SHA256_LINUX_SWADM="0449383a57468aec3b5a4ad26962cfc9e9a121bd13e777329e8a70767e6d9aae" diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 89c3e46164..3f6b566cda 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="d7169a61fd8833b3a1e6f46d897ca3295b2a28b6" +COMMIT="82aa17646265449ee0ede9410208e510fa4a5877" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index a7e18285ae..3de723fca6 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="d7169a61fd8833b3a1e6f46d897ca3295b2a28b6" -SHA2="d0f7611e5ecd049b0f83bcfa843942401f155a0be36d9a2dfd73b8341d5f816e" +COMMIT="82aa17646265449ee0ede9410208e510fa4a5877" +SHA2="b3f55fe24e54530fdf96c22a033f9edc0bad9c0a5e3344763a23e52b251d5113" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index e65e1fc0a2..a4dd82aaca 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="452dfb3491e1b6d4df6be1cb689921f59623aed082e47606a78c0f44d918f66a" -MGD_LINUX_SHA256="d4c48eb6374c0cc7812b7af2c0ac92acdcbc91b7718a9ce64d069da00ae5ae73" +CIDL_SHA256="1badd6adfece0a1b661f7efb9a2ca65e471f45cf9c8ecbd72b228ca174311e31" +MGD_LINUX_SHA256="92463e3266f5a702af28504349526189aa0ebb23adb166ec2603182acf6cdb8c" diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 4d199d28b8..d94baf1995 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -24,6 +24,7 @@ use omicron_common::api::internal::shared::PortFec as OmicronPortFec; use omicron_common::api::internal::shared::PortSpeed as OmicronPortSpeed; use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::internal::shared::SwitchLocation; +use omicron_common::OMICRON_DPD_TAG; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -185,7 +186,11 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( // Create and configure the link. match dpd_client - .port_settings_apply(&port_id, &port_settings) + .port_settings_apply( + &port_id, + Some(OMICRON_DPD_TAG), + &port_settings, + ) .await { Ok(_response) => { @@ -714,8 +719,8 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( dpd_client .port_settings_apply( &port_id, + Some(OMICRON_DPD_TAG), &PortSettings { - tag: WICKETD_TAG.to_string(), links: HashMap::new(), v4_routes: HashMap::new(), v6_routes: HashMap::new(), @@ -758,7 +763,6 @@ fn build_port_settings( }; let mut port_settings = PortSettings { - tag: WICKETD_TAG.to_string(), links: HashMap::new(), v4_routes: HashMap::new(), v6_routes: HashMap::new(), @@ -777,6 +781,7 @@ fn build_port_settings( kr: false, fec, speed, + lane: Some(LinkId(0)), }, }, );