diff --git a/.cargo/config b/.cargo/config.toml similarity index 100% rename from .cargo/config rename to .cargo/config.toml diff --git a/.github/buildomat/jobs/a4x2-prepare.sh b/.github/buildomat/jobs/a4x2-prepare.sh index 1e603fc7d9..1438ec06de 100755 --- a/.github/buildomat/jobs/a4x2-prepare.sh +++ b/.github/buildomat/jobs/a4x2-prepare.sh @@ -3,7 +3,7 @@ #: name = "a4x2-prepare" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [ #: "=/out/cargo-bay-ce.tgz", #: "=/out/cargo-bay-cr1.tgz", diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index a4cbd978a9..b63d2e783f 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -3,7 +3,7 @@ #: name = "build-and-test (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [ #: "%/work/*", #: "%/var/tmp/omicron_tmp/*", diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index f10f07ff7a..4a1f86c3e1 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -3,7 +3,7 @@ #: name = "build-and-test (ubuntu-22.04)" #: variety = "basic" #: target = "ubuntu-22.04" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [ #: "%/work/*", #: "%/var/tmp/omicron_tmp/*", diff --git a/.github/buildomat/jobs/clippy.sh b/.github/buildomat/jobs/clippy.sh index a5007694ab..1f4c578e47 100755 --- a/.github/buildomat/jobs/clippy.sh +++ b/.github/buildomat/jobs/clippy.sh @@ -3,7 +3,7 @@ #: name = "clippy (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [] # Run clippy on illumos (not just other systems) because a bunch of our code diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index bee2700d89..a2aac86aec 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -50,6 +50,7 @@ _exit_trap() { dump-state pfexec /opt/oxide/opte/bin/opteadm list-ports pfexec /opt/oxide/opte/bin/opteadm dump-v2b + pfexec /opt/oxide/opte/bin/opteadm dump-v2p z_swadm link ls z_swadm addr list z_swadm route list diff --git a/.github/buildomat/jobs/omicron-common.sh b/.github/buildomat/jobs/omicron-common.sh index 2d5f51f432..345d99f405 100755 --- a/.github/buildomat/jobs/omicron-common.sh +++ b/.github/buildomat/jobs/omicron-common.sh @@ -3,9 +3,8 @@ #: name = "omicron-common (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [] -#: skip_clone = true # Verify that omicron-common builds successfully when used as a dependency # in an external project. It must not leak anything that requires an external @@ -18,10 +17,9 @@ set -o xtrace cargo --version rustc --version +cd /tmp cargo new --lib test-project cd test-project -cargo add omicron-common \ - --git https://github.com/oxidecomputer/omicron.git \ - --rev "$GITHUB_SHA" +cargo add omicron-common --path /work/oxidecomputer/omicron/common cargo check cargo build --release diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 63e5e1ce71..81ed41a961 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -3,7 +3,7 @@ #: name = "helios / package" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [ #: "=/work/package.tar.gz", #: ] diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 2e3050b489..5b2d1bd405 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -3,7 +3,7 @@ #: name = "helios / build TUF repo" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.77.2" +#: rust_toolchain = "1.78.0" #: output_rules = [ #: "=/work/manifest.toml", #: "=/work/repo.zip", diff --git a/certificates/src/lib.rs b/certificates/src/lib.rs index 442a9cfdd5..ee4ab4a6bd 100644 --- a/certificates/src/lib.rs +++ b/certificates/src/lib.rs @@ -412,7 +412,7 @@ mod tests { // Valid certs: either no key usage values, or valid ones. for ext_key_usage in &valid_ext_key_usage { let mut params = CertificateParams::new(vec![HOST.to_string()]); - params.extended_key_usages = ext_key_usage.clone(); + params.extended_key_usages.clone_from(ext_key_usage); assert!( validate_cert_with_params(params, &[HOST]).is_ok(), @@ -431,7 +431,7 @@ mod tests { for ext_key_usage in &invalid_ext_key_usage { let mut params = CertificateParams::new(vec![HOST.to_string()]); - params.extended_key_usages = ext_key_usage.clone(); + params.extended_key_usages.clone_from(ext_key_usage); assert!( matches!( diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index d4fb36004f..aaa59d0e98 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -35,8 +35,9 @@ progenitor::generate_api!( PortConfigV1 = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, RouteConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, IpNet = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, - OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] }, RouterId = { derives = [PartialEq, Eq, Hash, Debug, Deserialize, Serialize] }, + VirtualNetworkInterfaceHost = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, + OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] }, }, //TODO trade the manual transformations later in this file for the // replace directives below? diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 5b029b0908..549f289ad0 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1273,16 +1273,6 @@ async fn cmd_db_disk_physical( // SERVICES -#[derive(Tabled)] -#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] -struct ServiceInstanceRow { - #[tabled(rename = "SERVICE")] - kind: String, - instance_id: Uuid, - addr: String, - sled_serial: String, -} - // Snapshots fn format_snapshot(state: &SnapshotState) -> impl Display { match state { @@ -1438,15 +1428,6 @@ async fn cmd_db_snapshot_info( // SLEDS -#[derive(Tabled)] -#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] -struct ServiceInstanceSledRow { - #[tabled(rename = "SERVICE")] - kind: String, - instance_id: Uuid, - addr: String, -} - #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct SledRow { diff --git a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs index 08ecaf3101..cd7628a840 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs @@ -101,16 +101,6 @@ trait Attributes: DynClone { fn y_axis_label(&self) -> String; fn axis_value(&self, val: f64) -> String; fn legend_value(&self, val: f64) -> String; - - fn increase(&mut self, _ndx: usize) -> Option { - None - } - - fn decrease(&mut self, _ndx: usize) -> Option { - None - } - - fn clear(&mut self) {} } dyn_clone::clone_trait_object!(Attributes); diff --git a/dev-tools/omdb/src/bin/omdb/mgs/sensors.rs b/dev-tools/omdb/src/bin/omdb/mgs/sensors.rs index d00bebd96c..f36e8633f9 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs/sensors.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs/sensors.rs @@ -480,12 +480,10 @@ fn sp_info_csv( } if let Some(sensor) = Sensor::from_string(&record[1], &record[2]) { - if sensors.get(&sensor).is_some() { + if !sensors.insert(sensor.clone()) { break; } - sensors.insert(sensor.clone()); - for (ndx, sp) in sps.iter().enumerate() { if let Some(sp) = sp { let value = match record[ndx + len].parse::() { diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index b36280980e..cefd07b161 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -114,6 +114,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + task: "vpc_route_manager" propagates updated VPC routes to all OPTE ports @@ -229,6 +233,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + task: "vpc_route_manager" propagates updated VPC routes to all OPTE ports @@ -331,6 +339,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + task: "vpc_route_manager" propagates updated VPC routes to all OPTE ports diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index b1476b5f37..c5ff9c9210 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -291,6 +291,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + task: "vpc_route_manager" propagates updated VPC routes to all OPTE ports @@ -475,6 +479,13 @@ task: "switch_port_config_manager" started at (s ago) and ran for ms warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) +task: "v2p_manager" + configured period: every 30s + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) + task: "vpc_route_manager" configured period: every 30s currently executing: no diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs index ceeb98b3bd..ed1c1a1fc8 100644 --- a/dev-tools/oxlog/src/bin/oxlog.rs +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -47,7 +47,7 @@ struct FilterArgs { #[arg(short, long)] archived: bool, - // Print only the extra log files + /// Print only the extra log files #[arg(short, long)] extra: bool, diff --git a/illumos-utils/src/opte/params.rs b/illumos-utils/src/opte/params.rs index df1f33cb92..17c61d680f 100644 --- a/illumos-utils/src/opte/params.rs +++ b/illumos-utils/src/opte/params.rs @@ -31,26 +31,16 @@ pub struct VpcFirewallRule { } /// A mapping from a virtual NIC to a physical host -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -pub struct SetVirtualNetworkInterfaceHost { +#[derive( + Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct VirtualNetworkInterfaceHost { pub virtual_ip: IpAddr, pub virtual_mac: external::MacAddr, pub physical_host_ip: Ipv6Addr, pub vni: external::Vni, } -/// The data needed to identify a virtual IP for which a sled maintains an OPTE -/// virtual-to-physical mapping such that that mapping can be deleted. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -pub struct DeleteVirtualNetworkInterfaceHost { - /// The virtual IP whose mapping should be deleted. - pub virtual_ip: IpAddr, - - /// The VNI for the network containing the virtual IP whose mapping should - /// be deleted. - pub vni: external::Vni, -} - /// DHCP configuration for a port /// /// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 943e818832..769634742b 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -5,8 +5,7 @@ //! Manager for all OPTE ports on a Helios system use crate::opte::opte_firewall_rules; -use crate::opte::params::DeleteVirtualNetworkInterfaceHost; -use crate::opte::params::SetVirtualNetworkInterfaceHost; +use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; use crate::opte::port::PortData; use crate::opte::Error; @@ -727,10 +726,62 @@ impl PortManager { Ok(()) } + #[cfg(target_os = "illumos")] + pub fn list_virtual_nics( + &self, + ) -> Result, Error> { + use macaddr::MacAddr6; + use opte_ioctl::OpteHdl; + + let hdl = OpteHdl::open(OpteHdl::XDE_CTL)?; + let v2p = + hdl.dump_v2p(&oxide_vpc::api::DumpVirt2PhysReq { unused: 99 })?; + let mut mappings: Vec<_> = vec![]; + + for mapping in v2p.mappings { + let vni = mapping + .vni + .as_u32() + .try_into() + .expect("opte VNI should be 24 bits"); + + for entry in mapping.ip4 { + mappings.push(VirtualNetworkInterfaceHost { + virtual_ip: IpAddr::V4(entry.0.into()), + virtual_mac: MacAddr6::from(entry.1.ether.bytes()).into(), + physical_host_ip: entry.1.ip.into(), + vni, + }); + } + + for entry in mapping.ip6 { + mappings.push(VirtualNetworkInterfaceHost { + virtual_ip: IpAddr::V6(entry.0.into()), + virtual_mac: MacAddr6::from(entry.1.ether.bytes()).into(), + physical_host_ip: entry.1.ip.into(), + vni, + }); + } + } + + Ok(mappings) + } + + #[cfg(not(target_os = "illumos"))] + pub fn list_virtual_nics( + &self, + ) -> Result, Error> { + info!( + self.inner.log, + "Listing virtual nics (ignored)"; + ); + Ok(vec![]) + } + #[cfg(target_os = "illumos")] pub fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { use opte_ioctl::OpteHdl; @@ -757,7 +808,7 @@ impl PortManager { #[cfg(not(target_os = "illumos"))] pub fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { info!( self.inner.log, @@ -770,20 +821,41 @@ impl PortManager { #[cfg(target_os = "illumos")] pub fn unset_virtual_nic_host( &self, - _mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - // TODO requires https://github.com/oxidecomputer/opte/issues/332 + use opte_ioctl::OpteHdl; + + info!( + self.inner.log, + "Clearing mapping of virtual NIC to physical host"; + "mapping" => ?&mapping, + ); + + let hdl = OpteHdl::open(OpteHdl::XDE_CTL)?; + hdl.clear_v2p(&oxide_vpc::api::ClearVirt2PhysReq { + vip: mapping.virtual_ip.into(), + phys: oxide_vpc::api::PhysNet { + ether: oxide_vpc::api::MacAddr::from( + (*mapping.virtual_mac).into_array(), + ), + ip: mapping.physical_host_ip.into(), + vni: Vni::new(mapping.vni).unwrap(), + }, + })?; - slog::warn!(self.inner.log, "unset_virtual_nic_host unimplmented"); Ok(()) } #[cfg(not(target_os = "illumos"))] pub fn unset_virtual_nic_host( &self, - _mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - info!(self.inner.log, "Ignoring unset of virtual NIC mapping"); + info!( + self.inner.log, + "Ignoring unset of virtual NIC mapping"; + "mapping" => ?&mapping, + ); Ok(()) } } diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 01f642a36b..08517026ef 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -379,6 +379,8 @@ pub struct BackgroundTaskConfig { pub instance_watcher: InstanceWatcherConfig, /// configuration for service VPC firewall propagation task pub service_firewall_propagation: ServiceFirewallPropagationConfig, + /// configuration for v2p mapping propagation task + pub v2p_mapping_propagation: V2PMappingPropagationConfig, } #[serde_as] @@ -539,6 +541,14 @@ pub struct ServiceFirewallPropagationConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct V2PMappingPropagationConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -777,6 +787,7 @@ mod test { region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 + v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -911,7 +922,10 @@ mod test { service_firewall_propagation: ServiceFirewallPropagationConfig { period_secs: Duration::from_secs(300), - } + }, + v2p_mapping_propagation: V2PMappingPropagationConfig { + period_secs: Duration::from_secs(30) + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -980,6 +994,7 @@ mod test { region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 + v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index c7b495b094..205885cfd8 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -55,6 +55,7 @@ mod project; mod semver_version; mod switch_interface; mod switch_port; +mod v2p_mapping; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -188,6 +189,7 @@ pub use typed_uuid::to_db_typed_uuid; pub use upstairs_repair::*; pub use user_builtin::*; pub use utilization::*; +pub use v2p_mapping::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index 05383cd056..c2258dba6c 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -92,7 +92,7 @@ impl OmicronZone { let (first_port, last_port) = snat_cfg.port_range_raw(); ntp_ntp_servers = Some(ntp_servers.clone()); ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); + ntp_ntp_domain.clone_from(domain); snat_ip = Some(IpNetwork::from(snat_cfg.ip)); snat_first_port = Some(SqlU16::from(first_port)); snat_last_port = Some(SqlU16::from(last_port)); @@ -162,7 +162,7 @@ impl OmicronZone { } => { ntp_ntp_servers = Some(ntp_servers.clone()); ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); + ntp_ntp_domain.clone_from(domain); (ZoneType::InternalNtp, address, None) } OmicronZoneType::Nexus { diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index bab18d9fae..a96bca8ab9 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -285,6 +285,17 @@ table! { } } +table! { + v2p_mapping_view (nic_id) { + nic_id -> Uuid, + sled_id -> Uuid, + sled_ip -> Inet, + vni -> Int4, + mac -> Int8, + ip -> Inet, + } +} + table! { bgp_announce_set (id) { id -> Uuid, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 47a5689d07..e66c89f86f 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -29,7 +29,8 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), - KnownVersion::new(64, "vpc-subnet-routing"), + KnownVersion::new(65, "vpc-subnet-routing"), + KnownVersion::new(64, "add-view-for-v2p-mappings"), KnownVersion::new(63, "remove-producer-base-route-column"), KnownVersion::new(62, "allocate-subnet-decommissioned-sleds"), KnownVersion::new(61, "blueprint-add-sled-state"), diff --git a/nexus/db-model/src/v2p_mapping.rs b/nexus/db-model/src/v2p_mapping.rs new file mode 100644 index 0000000000..43831f7503 --- /dev/null +++ b/nexus/db-model/src/v2p_mapping.rs @@ -0,0 +1,16 @@ +use crate::schema::v2p_mapping_view; +use crate::{MacAddr, Vni}; +use ipnetwork::IpNetwork; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Queryable, Selectable, Clone, Debug, Serialize, Deserialize)] +#[diesel(table_name = v2p_mapping_view)] +pub struct V2PMappingView { + pub nic_id: Uuid, + pub sled_id: Uuid, + pub sled_ip: IpNetwork, + pub vni: Vni, + pub mac: MacAddr, + pub ip: IpNetwork, +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 9f2d2d02db..1618395800 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -94,6 +94,7 @@ mod switch_port; pub(crate) mod test_utils; mod update; mod utilization; +mod v2p_mapping; mod virtual_provisioning_collection; mod vmm; mod volume; @@ -983,8 +984,8 @@ mod test { // This is a little goofy, but it catches a bug that has // happened before. The returned columns share names (like // "id"), so we need to process them in-order. - assert!(regions.get(&dataset.id()).is_none()); - assert!(disk_datasets.get(®ion.id()).is_none()); + assert!(!regions.contains(&dataset.id())); + assert!(!disk_datasets.contains(®ion.id())); // Dataset must not be eligible for provisioning. if let Some(kind) = diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 2ac1f531a3..6baa6f643f 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -830,6 +830,62 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) }) } + + /// List all network interfaces associated with all instances, making as + /// many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + /// + /// This particular method was added for propagating v2p mappings via RPWs + pub async fn instance_network_interfaces_all_list_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_interfaces = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .instance_network_interfaces_all_list( + opctx, + &p.current_pagparams(), + ) + .await?; + paginator = p + .found_batch(&batch, &|nic: &InstanceNetworkInterface| { + nic.id() + }); + all_interfaces.extend(batch); + } + Ok(all_interfaces) + } + + /// List one page of all network interfaces associated with instances + pub async fn instance_network_interfaces_all_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::instance_network_interface::dsl; + + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require ListChildren on the + // instance to list). As a logical proxy, we check for listing children + // of the service IP pool. + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::instance_network_interface, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .select(InstanceNetworkInterface::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] diff --git a/nexus/db-queries/src/db/datastore/v2p_mapping.rs b/nexus/db-queries/src/db/datastore/v2p_mapping.rs new file mode 100644 index 0000000000..6c00957e7d --- /dev/null +++ b/nexus/db-queries/src/db/datastore/v2p_mapping.rs @@ -0,0 +1,45 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::datastore::SQL_BATCH_SIZE; +use crate::db::error::{public_error_from_diesel, ErrorHandler}; +use crate::db::model::V2PMappingView; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::{QueryDsl, SelectableHelper}; +use omicron_common::api::external::ListResultVec; + +impl DataStore { + pub async fn v2p_mappings( + &self, + opctx: &OpContext, + ) -> ListResultVec { + use db::schema::v2p_mapping_view::dsl; + + opctx.check_complex_operations_allowed()?; + + let mut mappings = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::v2p_mapping_view, + dsl::nic_id, + &p.current_pagparams(), + ) + .select(V2PMappingView::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + paginator = p.found_batch(&batch, &|mapping| mapping.nic_id); + mappings.extend(batch); + } + + Ok(mappings) + } +} diff --git a/nexus/db-queries/src/db/explain.rs b/nexus/db-queries/src/db/explain.rs index 3de5b4f280..24fd993040 100644 --- a/nexus/db-queries/src/db/explain.rs +++ b/nexus/db-queries/src/db/explain.rs @@ -4,6 +4,11 @@ //! Utility allowing Diesel to EXPLAIN queries. +// These utilities can be useful during development, so we don't want to +// `#[cfg(test)]` the module, but it's likely they won't be used outside of +// tests. +#![cfg_attr(not(test), allow(dead_code))] + use super::pool::DbConnection; use async_bb8_diesel::AsyncRunQueryDsl; use async_trait::async_trait; @@ -17,33 +22,6 @@ use diesel::result::Error as DieselError; /// Q: The Query we're explaining. /// /// EXPLAIN: -pub trait Explainable { - /// Syncronously issues an explain statement. - fn explain( - self, - conn: &mut DbConnection, - ) -> Result; -} - -impl Explainable for Q -where - Q: QueryFragment - + QueryId - + RunQueryDsl - + Sized - + 'static, -{ - fn explain( - self, - conn: &mut DbConnection, - ) -> Result { - Ok(ExplainStatement { query: self } - .get_results::(conn)? - .join("\n")) - } -} - -/// An async variant of [`Explainable`]. #[async_trait] pub trait ExplainableAsync { /// Asynchronously issues an explain statement. @@ -185,7 +163,8 @@ mod test { logctx.cleanup_successful(); } - // Tests that ".explain()" can tell us when we're doing full table scans. + // Tests that ".explain_async()" can tell us when we're doing full table + // scans. #[tokio::test] async fn test_explain_full_table_scan() { let logctx = dev::test_setup_log("test_explain_full_table_scan"); diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index 5c803e20ac..c7215417c5 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -69,18 +69,6 @@ enum TrustedStrVariants { ValidatedExplicitly(String), } -trait SqlQueryBinds { - fn add_bind(self, bind_counter: &BindParamCounter) -> Self; -} - -impl<'a, Query> SqlQueryBinds - for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> -{ - fn add_bind(self, bind_counter: &BindParamCounter) -> Self { - self.sql("$").sql(bind_counter.next().to_string()) - } -} - type BoxedQuery = diesel::query_builder::BoxedSqlQuery< 'static, Pg, diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index d3faf2459c..cba2edb7e6 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -116,6 +116,7 @@ region_replacement.period_secs = 30 # How frequently to query the status of active instances. instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt b/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt new file mode 100644 index 0000000000..bb2ad481bc --- /dev/null +++ b/nexus/reconfigurator/planning/proptest-regressions/planner/omicron_zone_placement.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 72b902d1405681df2dd46efc097da6840ff1234dc9d0d7c0ecf07bed0b0e7d8d # shrinks to input = _TestPlaceOmicronZonesArgs { input: ArbitraryTestInput { existing_sleds: {[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]: ExistingSled { zones: ZonesToPlace { zones: [] }, waiting_for_ntp: false, num_disks: 1 }}, zones_to_place: ZonesToPlace { zones: [Nexus] } } } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index aca5f057d8..3708d212ec 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -10,6 +10,7 @@ use crate::blueprint_builder::BlueprintBuilder; use crate::blueprint_builder::Ensure; use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; +use crate::planner::omicron_zone_placement::PlacementError; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::PlanningInput; @@ -25,6 +26,12 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; use std::hash::Hash; +use self::omicron_zone_placement::DiscretionaryOmicronZone; +use self::omicron_zone_placement::OmicronZonePlacement; +use self::omicron_zone_placement::OmicronZonePlacementSledState; + +mod omicron_zone_placement; + pub struct Planner<'a> { log: Logger, input: &'a PlanningInput, @@ -214,7 +221,7 @@ impl<'a> Planner<'a> { // We will not mark sleds getting Crucible zones as ineligible; other // control plane service zones starting concurrently with Crucible zones // is fine. - let mut sleds_waiting_for_ntp_zones = BTreeSet::new(); + let mut sleds_waiting_for_ntp_zone = BTreeSet::new(); for (sled_id, sled_resources) in self.input.all_sled_resources(SledFilter::InService) @@ -252,7 +259,7 @@ impl<'a> Planner<'a> { // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". - sleds_waiting_for_ntp_zones.insert(sled_id); + sleds_waiting_for_ntp_zone.insert(sled_id); continue; } @@ -321,9 +328,7 @@ impl<'a> Planner<'a> { } } - self.ensure_correct_number_of_nexus_zones( - &sleds_waiting_for_ntp_zones, - )?; + self.ensure_correct_number_of_nexus_zones(&sleds_waiting_for_ntp_zone)?; Ok(()) } @@ -344,7 +349,7 @@ impl<'a> Planner<'a> { // TODO-correctness What should we do if we have _too many_ Nexus // instances? For now, just log it the number of zones any time we have // at least the minimum number. - let nexus_to_add = self + let mut nexus_to_add = self .input .target_nexus_zone_count() .saturating_sub(num_total_nexus); @@ -357,70 +362,69 @@ impl<'a> Planner<'a> { return Ok(()); } - // Now bin all the sleds which are eligible choices for a new Nexus zone - // by their current Nexus zone count. Skip sleds with a policy/state - // that should be eligible for Nexus but that don't yet have an NTP - // zone. - let mut sleds_by_num_nexus: BTreeMap> = - BTreeMap::new(); - for sled_id in self - .input - .all_sled_ids(SledFilter::Discretionary) - .filter(|sled_id| !sleds_waiting_for_ntp_zone.contains(sled_id)) - { - let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); - sleds_by_num_nexus.entry(num_nexus).or_default().push(sled_id); - } - - // Ensure we have at least one sled on which we can add Nexus zones. If - // we don't, we have nothing else to do. This isn't a hard error, - // because we might be waiting for NTP on all eligible sleds (although - // it would be weird, since we're presumably running from within Nexus - // on some sled). - if sleds_by_num_nexus.is_empty() { - warn!(self.log, "want to add Nexus zones, but no eligible sleds"); - return Ok(()); - } + let mut zone_placement = OmicronZonePlacement::new( + self.input + .all_sled_resources(SledFilter::Discretionary) + .filter(|(sled_id, _)| { + !sleds_waiting_for_ntp_zone.contains(&sled_id) + }) + .map(|(sled_id, sled_resources)| { + OmicronZonePlacementSledState { + sled_id, + num_zpools: sled_resources + .all_zpools(ZpoolFilter::InService) + .count(), + discretionary_zones: self + .blueprint + .current_sled_zones(sled_id) + .filter_map(|zone| { + DiscretionaryOmicronZone::from_zone_type( + &zone.zone_type, + ) + }) + .collect(), + } + }), + ); - // Build a map of sled -> new nexus zone count. + // Build a map of sled -> new nexus zones to add. let mut sleds_to_change: BTreeMap = BTreeMap::new(); - 'outer: for _ in 0..nexus_to_add { - // `sleds_by_num_nexus` is sorted by key already, and we want to - // pick from the lowest-numbered bin. We can just loop over its - // keys, expecting to stop on the first iteration, with the only - // exception being when we've removed all the sleds from a bin. - for (&num_nexus, sleds) in sleds_by_num_nexus.iter_mut() { - // `sleds` contains all sleds with the minimum number of Nexus - // zones. Pick one arbitrarily but deterministically. - let Some(sled_id) = sleds.pop() else { - // We already drained this bin; move on. - continue; - }; - - // This insert might overwrite an old value for this sled (e.g., - // in the "we have 1 sled and need to add many Nexus instances - // to it" case). That's fine. - sleds_to_change.insert(sled_id, num_nexus + 1); + for i in 0..nexus_to_add { + match zone_placement.place_zone(DiscretionaryOmicronZone::Nexus) { + Ok(sled_id) => { + *sleds_to_change.entry(sled_id).or_default() += 1; + } + Err(PlacementError::NoSledsEligible { .. }) => { + // We won't treat this as a hard error; it's possible + // (albeit unlikely?) we're in a weird state where we need + // more sleds or disks to come online, and we may need to be + // able to produce blueprints to achieve that status. + warn!( + self.log, + "failed to place all new desired Nexus instances"; + "placed" => i, + "wanted_to_place" => nexus_to_add, + ); - // Put this sled back in our map, but now with one more Nexus. - sleds_by_num_nexus - .entry(num_nexus + 1) - .or_default() - .push(sled_id); + // Adjust `nexus_to_add` downward so it's consistent with + // the number of Nexuses we're actually adding. + nexus_to_add = i; - continue 'outer; + break; + } } - - // This should be unreachable: it's only possible if we fail to find - // a nonempty vec in `sleds_by_num_nexus`, and we checked above that - // `sleds_by_num_nexus` is not empty. - unreachable!("logic error finding sleds for Nexus"); } // For each sled we need to change, actually do so. let mut total_added = 0; - for (sled_id, new_nexus_count) in sleds_to_change { + for (sled_id, additional_nexus_count) in sleds_to_change { + // TODO-cleanup This is awkward: the builder wants to know how many + // total Nexus zones go on a given sled, but we have a count of how + // many we want to add. Construct a new target count. Maybe the + // builder should provide a different interface here? + let new_nexus_count = self.blueprint.sled_num_nexus_zones(sled_id) + + additional_nexus_count; match self .blueprint .sled_ensure_zone_multiple_nexus(sled_id, new_nexus_count)? diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs new file mode 100644 index 0000000000..26e72db434 --- /dev/null +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -0,0 +1,493 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Omicron zone placement decisions + +use nexus_types::deployment::BlueprintZoneType; +use omicron_uuid_kinds::SledUuid; +use sled_agent_client::ZoneKind; +use std::cmp::Ordering; +use std::collections::BinaryHeap; +use std::mem; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(test, derive(test_strategy::Arbitrary))] +pub(super) enum DiscretionaryOmicronZone { + Nexus, + // TODO expand this enum as we start to place more services +} + +impl DiscretionaryOmicronZone { + pub(super) fn from_zone_type( + zone_type: &BlueprintZoneType, + ) -> Option { + match zone_type { + BlueprintZoneType::Nexus(_) => Some(Self::Nexus), + // Zones that we should place but don't yet. + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::Oximeter(_) => None, + // Zones that get special handling for placement (all sleds get + // them, although internal NTP has some interactions with boundary + // NTP that we don't yet handle, so this may change). + BlueprintZoneType::Crucible(_) + | BlueprintZoneType::InternalNtp(_) => None, + } + } +} + +impl From for ZoneKind { + fn from(zone: DiscretionaryOmicronZone) -> Self { + match zone { + DiscretionaryOmicronZone::Nexus => Self::Nexus, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub(super) enum PlacementError { + #[error( + "no sleds eligible for placement of new {} zone", + ZoneKind::from(*zone_kind) + )] + NoSledsEligible { zone_kind: DiscretionaryOmicronZone }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) struct OmicronZonePlacementSledState { + pub sled_id: SledUuid, + pub num_zpools: usize, + pub discretionary_zones: Vec, +} + +/// `OmicronZonePlacement` keeps an internal heap of sleds and their current +/// discretionary zones and chooses sleds for placement of additional +/// discretionary zones. +#[derive(Debug, Clone)] +pub(super) struct OmicronZonePlacement { + sleds: OrderedSleds, +} + +impl OmicronZonePlacement { + /// Construct a new `OmicronZonePlacement` with a given set of eligible + /// sleds. + /// + /// Sleds which are not eligible for discretionary services for reasons + /// outside the knowledge of `OmicronZonePlacement` (e.g., sleds with a + /// policy or state that makes them ineligible) should be omitted from this + /// list of sleds. For now, sleds that are waiting for an NTP zone should be + /// omitted as well, although that may change in the future when we add + /// support for boundary NTP zone placement. + pub(super) fn new( + sleds: impl Iterator, + ) -> Self { + // We rebuild our heap whenever the zone type we're placing changes. We + // need to pick _something_ to start; this only matters for performance, + // not correctness (we don't have to rebuild the heap if `place_zone` is + // called with a zone kind that matches the current sorting). + let ordered_by = DiscretionaryOmicronZone::Nexus; + Self { sleds: OrderedSleds::new(ordered_by, sleds) } + } + + /// Attempt to place a new zone of kind `zone_kind` on one of the sleds + /// provided when this `OmicronZonePlacement` was created. + /// + /// On success, the internal heap held by `self` is updated assuming that a + /// new zone of the given kind was added to the sled returned by + /// `place_zone()`. This allows one `OmicronZonePlacement` to be reused + /// across multiple zone placement decisions, but requires the caller to + /// accept its decisions. If the caller decides not to add a zone to the + /// returned sled, the `OmicronZonePlacement` instance should be discarded + /// and a new one should be created for future placement decisions. + /// + /// Placement is currently minimal. The only hard requirement we enforce is + /// that a sled may only one run one instance of any given zone kind per + /// zpool it has (e.g., a sled with 5 zpools could run 5 Nexus instances and + /// 5 CockroachDb instances concurrently, but could not run 6 Nexus + /// instances). If there is at least one sled that satisfies this + /// requirement, this method will return `Ok(_)`. If there are multiple + /// sleds that satisfy this requirement, this method will return a sled + /// which has the fewest instances of `zone_kind`; if multiple sleds are + /// tied, it will pick the one with the fewest total discretionary zones; if + /// multiple sleds are still tied, it will pick deterministically (e.g., + /// choosing the lowest or highest sled ID). + /// + /// `OmicronZonePlacement` currently does not track _which_ zpools are + /// assigned to services. This could lead to it being overly conservative if + /// zpools that are not in service are hosting relevant zones. For example, + /// imagine a sled with two zpools: zpool-a and zpool-b. The sled has a + /// single Nexus instance with a transitory dataset on zpool-a. If zpool-a + /// is in a degraded state and considered not-in-service, + /// `OmicronZonePlacement` will be told by the planner that the sled has 1 + /// zpool. Our simple check of "at most one Nexus per zpool" would + /// erroneously fail to realize we could still add a Nexus (backed by + /// zpool-b), and would claim that the sled already has a Nexus for each + /// zpool. + /// + /// We punt on this problem for multiple reasons: + /// + /// 1. It's overly conservative; if we get into this state, we may refuse to + /// start services when we ought to be able to, but this isn't the worst + /// failure mode. In practice we should have far more options for + /// placement than we need for any of our control plane services, so + /// skipping a sled in this state should be fine. + /// 2. We don't yet track transitory datasets, so even if we wanted to know + /// which zpool Nexus was using (in the above example), we can't. + /// 3. We don't (yet?) have a way for a zpool to be present, backing a zone, + /// and not considered to be in service. The only zpools that aren't in + /// service belong to expunged disks, which can't be backing live + /// services. + pub(super) fn place_zone( + &mut self, + zone_kind: DiscretionaryOmicronZone, + ) -> Result { + self.sleds.ensure_ordered_by(zone_kind); + + let mut sleds_skipped = Vec::new(); + let mut chosen_sled = None; + while let Some(sled) = self.sleds.pop() { + // Ensure we have at least one zpool more than the number of + // `zone_kind` zones already placed on this sled. If we don't, we + // already have a zone of this kind on each zpool, so we'll skip + // this sled. + if sled + .discretionary_zones + .iter() + .filter(|&&z| z == zone_kind) + .count() + < sled.num_zpools + { + chosen_sled = Some(sled); + break; + } else { + sleds_skipped.push(sled); + } + } + + // Push any skipped sleds back onto our heap. + for sled in sleds_skipped { + self.sleds.push(sled); + } + + let mut sled = + chosen_sled.ok_or(PlacementError::NoSledsEligible { zone_kind })?; + let sled_id = sled.sled_id; + + // Update our internal state so future `place_zone` calls take the new + // zone we just placed into account. + sled.discretionary_zones.push(zone_kind); + self.sleds.push(sled); + + Ok(sled_id) + } +} + +// Wrapper around a binary heap that allows us to change the ordering at runtime +// (so we can sort for particular types of zones to place). +#[derive(Debug, Clone)] +struct OrderedSleds { + // The current zone type we're sorted to place. We maintain the invariant + // that every element of `heap` has the same `ordered_by` value as this + // field's current value. + ordered_by: DiscretionaryOmicronZone, + heap: BinaryHeap, +} + +impl OrderedSleds { + fn new( + ordered_by: DiscretionaryOmicronZone, + sleds: impl Iterator, + ) -> Self { + Self { + ordered_by, + heap: sleds + .map(|sled| OrderedSledState { ordered_by, sled }) + .collect(), + } + } + + fn ensure_ordered_by(&mut self, ordered_by: DiscretionaryOmicronZone) { + if self.ordered_by == ordered_by { + return; + } + + // Rebuild our heap, sorting by a new zone kind, and maintaining the + // invariant that all our heap members have the same `ordered_by` value + // as we do. + let mut sleds = mem::take(&mut self.heap).into_vec(); + for s in &mut sleds { + s.ordered_by = ordered_by; + } + self.heap = BinaryHeap::from(sleds); + self.ordered_by = ordered_by; + } + + fn pop(&mut self) -> Option { + self.heap.pop().map(|ordered| ordered.sled) + } + + fn push(&mut self, sled: OmicronZonePlacementSledState) { + self.heap.push(OrderedSledState { ordered_by: self.ordered_by, sled }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct OrderedSledState { + ordered_by: DiscretionaryOmicronZone, + sled: OmicronZonePlacementSledState, +} + +impl Ord for OrderedSledState { + fn cmp(&self, other: &Self) -> Ordering { + // Invariant: We should never compare other entries with a different + // `ordered_by`. This is enforced by `OrderedSleds`. + assert_eq!(self.ordered_by, other.ordered_by); + + // Count how many zones of our ordering type are in each side. + let our_zones_of_interest = self + .sled + .discretionary_zones + .iter() + .filter(|&&z| z == self.ordered_by) + .count(); + let other_zones_of_interest = other + .sled + .discretionary_zones + .iter() + .filter(|&&z| z == self.ordered_by) + .count(); + + // BinaryHeap is a max heap, and we want to be on the top of the heap if + // we have fewer zones of interest, so reverse the comparisons below. + our_zones_of_interest + .cmp(&other_zones_of_interest) + .reverse() + // If the zones of interest count is equal, we tiebreak by total + // discretionary zones, again reversing the order for our max heap + // to prioritize sleds with fewer total discretionary zones. + .then_with(|| { + self.sled + .discretionary_zones + .len() + .cmp(&other.sled.discretionary_zones.len()) + .reverse() + }) + // If we're still tied, tiebreak by sorting on sled ID for + // determinism. + .then_with(|| self.sled.sled_id.cmp(&other.sled.sled_id)) + } +} + +impl PartialOrd for OrderedSledState { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[cfg(test)] +pub mod test { + use super::*; + use omicron_uuid_kinds::GenericUuid; + use proptest::arbitrary::any; + use proptest::collection::btree_map; + use proptest::sample::size_range; + use std::collections::BTreeMap; + use test_strategy::proptest; + use test_strategy::Arbitrary; + use uuid::Uuid; + + #[derive(Debug, Clone, Arbitrary)] + struct ZonesToPlace { + #[any(size_range(0..8).lift())] + zones: Vec, + } + + #[derive(Debug, Clone, Arbitrary)] + struct ExistingSled { + zones: ZonesToPlace, + #[strategy(0_usize..8)] + num_zpools: usize, + } + + #[derive(Debug, Arbitrary)] + struct ArbitraryTestInput { + #[strategy(btree_map(any::<[u8; 16]>(), any::(), 1..8))] + existing_sleds: BTreeMap<[u8; 16], ExistingSled>, + zones_to_place: ZonesToPlace, + } + + #[derive(Debug)] + struct TestInput { + state: TestState, + zones_to_place: Vec, + } + + impl From for TestInput { + fn from(input: ArbitraryTestInput) -> Self { + let mut sleds = BTreeMap::new(); + for (&raw_id, existing_sled) in input.existing_sleds.iter() { + let sled_id = + SledUuid::from_untyped_uuid(Uuid::from_bytes(raw_id)); + sleds.insert( + sled_id, + TestSledState { + zones: existing_sled.zones.zones.clone(), + num_zpools: existing_sled.num_zpools, + }, + ); + } + let state = TestState { sleds }; + Self { state, zones_to_place: input.zones_to_place.zones } + } + } + + #[derive(Debug)] + struct TestSledState { + zones: Vec, + num_zpools: usize, + } + + impl TestSledState { + fn count_zones_of_kind(&self, kind: DiscretionaryOmicronZone) -> usize { + self.zones.iter().filter(|&&k| k == kind).count() + } + } + + #[derive(Debug)] + struct TestState { + sleds: BTreeMap, + } + + impl TestState { + fn validate_sled_can_support_another_zone_of_kind( + &self, + sled_id: SledUuid, + kind: DiscretionaryOmicronZone, + ) -> Result<(), String> { + let sled_state = self.sleds.get(&sled_id).expect("valid sled_id"); + let existing_zones = sled_state.count_zones_of_kind(kind); + if existing_zones < sled_state.num_zpools { + Ok(()) + } else { + Err(format!( + "already have {existing_zones} \ + {kind:?} instances but only {} zpools", + sled_state.num_zpools + )) + } + } + + fn validate_placement( + &mut self, + sled_id: SledUuid, + kind: DiscretionaryOmicronZone, + ) -> Result<(), String> { + // Ensure this sled is eligible for this kind at all: We have to + // have at least one disk on which we can put the dataset for this + // zone that isn't already holding another zone of this same kind + // (i.e., at most one zone of any given kind per disk per sled). + self.validate_sled_can_support_another_zone_of_kind(sled_id, kind)?; + + let sled_state = self.sleds.get(&sled_id).expect("valid sled_id"); + let existing_zones = sled_state.count_zones_of_kind(kind); + + // Ensure this sled is (at least tied for) the best choice for this + // kind: it should have the minimum number of existing zones of this + // kind, and of all sleds tied for the minimum, it should have the + // fewest total discretionary services. + for (&other_sled_id, other_sled_state) in &self.sleds { + // Ignore other sleds that can't run another zone of `kind`. + if self + .validate_sled_can_support_another_zone_of_kind( + other_sled_id, + kind, + ) + .is_err() + { + continue; + } + + let other_zone_count = + other_sled_state.count_zones_of_kind(kind); + if other_zone_count < existing_zones { + return Err(format!( + "sled {other_sled_id} would be a better choice \ + (fewer existing {kind:?} instances: \ + {other_zone_count} < {existing_zones})" + )); + } + if other_zone_count == existing_zones + && other_sled_state.zones.len() < sled_state.zones.len() + { + return Err(format!( + "sled {other_sled_id} would be a better choice \ + (same number of existing {kind:?} instances, but \ + fewer total discretionary services: {} < {})", + other_sled_state.zones.len(), + sled_state.zones.len(), + )); + } + } + + // This placement is valid: update our state. + self.sleds.get_mut(&sled_id).unwrap().zones.push(kind); + Ok(()) + } + + fn validate_no_placement_possible( + &self, + kind: DiscretionaryOmicronZone, + ) -> Result<(), String> { + // Zones should be placeable unless every sled already has a zone of + // this kind on every disk. + for (sled_id, sled_state) in self.sleds.iter() { + if sled_state.count_zones_of_kind(kind) < sled_state.num_zpools + { + return Err(format!( + "sled {sled_id} is eligible for {kind:?} placement" + )); + } + } + Ok(()) + } + } + + #[proptest] + fn test_place_omicron_zones(input: ArbitraryTestInput) { + let mut input = TestInput::from(input); + + let mut placer = + OmicronZonePlacement::new(input.state.sleds.iter().map( + |(&sled_id, sled_state)| OmicronZonePlacementSledState { + sled_id, + num_zpools: sled_state.num_zpools, + discretionary_zones: sled_state.zones.clone(), + }, + )); + + for z in input.zones_to_place { + println!("placing {z:?}"); + match placer.place_zone(z) { + Ok(sled_id) => { + input + .state + .validate_placement(sled_id, z) + .expect("valid placement"); + } + Err(PlacementError::NoSledsEligible { zone_kind }) => { + assert_eq!(zone_kind, z); + input + .state + .validate_no_placement_possible(z) + .expect("no placement possible"); + } + } + } + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 6210b0dd3e..ea053a2a36 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -22,6 +22,7 @@ use super::region_replacement; use super::service_firewall_rules; use super::sync_service_zone_nat::ServiceZoneNatTracker; use super::sync_switch_configuration::SwitchPortSettingsManager; +use super::v2p_mappings::V2PManager; use super::vpc_routes; use crate::app::oximeter::PRODUCER_LEASE_DURATION; use crate::app::sagas::SagaRequest; @@ -91,6 +92,9 @@ pub struct BackgroundTasks { /// task handle for the switch port settings manager pub task_switch_port_settings_manager: common::TaskHandle, + /// task handle for the opte v2p manager + pub task_v2p_manager: common::TaskHandle, + /// task handle for the task that detects if regions need replacement and /// begins the process pub task_region_replacement: common::TaskHandle, @@ -117,6 +121,10 @@ impl BackgroundTasks { nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, saga_request: Sender, + v2p_watcher: ( + tokio::sync::watch::Sender<()>, + tokio::sync::watch::Receiver<()>, + ), producer_registry: &ProducerRegistry, ) -> BackgroundTasks { let mut driver = common::Driver::new(); @@ -336,6 +344,17 @@ impl BackgroundTasks { ) }; + let task_v2p_manager = { + driver.register( + "v2p_manager".to_string(), + String::from("manages opte v2p mappings for vpc networking"), + config.v2p_mapping_propagation.period_secs, + Box::new(V2PManager::new(datastore.clone())), + opctx.child(BTreeMap::new()), + vec![Box::new(v2p_watcher.1)], + ) + }; + // Background task: detect if a region needs replacement and begin the // process let task_region_replacement = { @@ -362,6 +381,7 @@ impl BackgroundTasks { resolver.clone(), producer_registry, instance_watcher::WatcherIdentity { nexus_id, rack_id }, + v2p_watcher.0, ); driver.register( "instance_watcher".to_string(), @@ -418,6 +438,7 @@ impl BackgroundTasks { task_blueprint_executor, task_service_zone_nat_tracker, task_switch_port_settings_manager, + task_v2p_manager, task_region_replacement, task_instance_watcher, task_service_firewall_propagation, diff --git a/nexus/src/app/background/instance_watcher.rs b/nexus/src/app/background/instance_watcher.rs index 4cdca3c4b7..d473ea8e99 100644 --- a/nexus/src/app/background/instance_watcher.rs +++ b/nexus/src/app/background/instance_watcher.rs @@ -35,6 +35,7 @@ pub(crate) struct InstanceWatcher { resolver: internal_dns::resolver::Resolver, metrics: Arc>, id: WatcherIdentity, + v2p_notification_tx: tokio::sync::watch::Sender<()>, } const MAX_SLED_AGENTS: NonZeroU32 = unsafe { @@ -48,12 +49,13 @@ impl InstanceWatcher { resolver: internal_dns::resolver::Resolver, producer_registry: &ProducerRegistry, id: WatcherIdentity, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Self { let metrics = Arc::new(Mutex::new(metrics::Metrics::default())); producer_registry .register_producer(metrics::Producer(metrics.clone())) .unwrap(); - Self { datastore, resolver, metrics, id } + Self { datastore, resolver, metrics, id, v2p_notification_tx } } fn check_instance( @@ -73,6 +75,7 @@ impl InstanceWatcher { .collect(), ); let client = client.clone(); + let v2p_notification_tx = self.v2p_notification_tx.clone(); async move { slog::trace!(opctx.log, "checking on instance..."); @@ -153,6 +156,7 @@ impl InstanceWatcher { &opctx.log, &target.instance_id, &new_runtime_state, + v2p_notification_tx, ) .await .map_err(|e| { diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index db1d23f221..6d27a069af 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -25,6 +25,7 @@ mod service_firewall_rules; mod status; mod sync_service_zone_nat; mod sync_switch_configuration; +mod v2p_mappings; mod vpc_routes; pub use init::BackgroundTasks; diff --git a/nexus/src/app/background/sync_switch_configuration.rs b/nexus/src/app/background/sync_switch_configuration.rs index dc7aa74576..7efe9ef92b 100644 --- a/nexus/src/app/background/sync_switch_configuration.rs +++ b/nexus/src/app/background/sync_switch_configuration.rs @@ -551,7 +551,8 @@ impl BackgroundTask for SwitchPortSettingsManager { // Same thing as above, check to see if we've already built the announce set, // if so we'll skip this step - if bgp_announce_prefixes.get(&bgp_config.bgp_announce_set_id).is_none() { + #[allow(clippy::map_entry)] + if !bgp_announce_prefixes.contains_key(&bgp_config.bgp_announce_set_id) { let announcements = match self .datastore .bgp_announce_list( diff --git a/nexus/src/app/background/v2p_mappings.rs b/nexus/src/app/background/v2p_mappings.rs new file mode 100644 index 0000000000..a53ac3442f --- /dev/null +++ b/nexus/src/app/background/v2p_mappings.rs @@ -0,0 +1,165 @@ +use std::{collections::HashSet, sync::Arc}; + +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::{Sled, SledState}; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_networking::sled_client_from_address; +use nexus_types::{ + deployment::SledFilter, external_api::views::SledPolicy, identity::Asset, +}; +use omicron_common::api::external::Vni; +use serde_json::json; +use sled_agent_client::types::VirtualNetworkInterfaceHost; + +use super::common::BackgroundTask; + +pub struct V2PManager { + datastore: Arc, +} + +impl V2PManager { + pub fn new(datastore: Arc) -> Self { + Self { datastore } + } +} + +impl BackgroundTask for V2PManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + let log = opctx.log.clone(); + + async move { + // Get the v2p mappings + let v2p_mappings = match self.datastore.v2p_mappings(opctx).await { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to list v2p mappings: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + }; + + // Get sleds + // we only care about sleds that are active && inservice + let sleds = match self.datastore.sled_list_all_batched(opctx, SledFilter::InService).await + { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to enumerate sleds: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + } + .into_iter() + .filter(|sled| { + matches!(sled.state(), SledState::Active) + && matches!(sled.policy(), SledPolicy::InService { .. }) + }); + + // Map sled db records to sled-agent clients + let sled_clients: Vec<(Sled, sled_agent_client::Client)> = sleds + .map(|sled| { + let client = sled_client_from_address( + sled.id(), + sled.address(), + &log, + ); + (sled, client) + }) + .collect(); + + // create a set of updates from the v2p mappings + let desired_v2p: HashSet<_> = v2p_mappings + .into_iter() + .filter_map(|mapping| { + let physical_host_ip = match mapping.sled_ip.ip() { + std::net::IpAddr::V4(v) => { + // sled ip should never be ipv4 + error!( + &log, + "sled ip should be ipv6 but is ipv4: {v}" + ); + return None; + } + std::net::IpAddr::V6(v) => v, + }; + + let vni = mapping.vni.0; + + let mapping = VirtualNetworkInterfaceHost { + virtual_ip: mapping.ip.ip(), + virtual_mac: *mapping.mac, + physical_host_ip, + vni, + }; + Some(mapping) + }) + .collect(); + + for (sled, client) in sled_clients { + // + // Get the current mappings on each sled + // Ignore vopte interfaces that are used for services. Service zones only need + // an opte interface for external communication. For services zones, intra-sled + // communication is facilitated via zone underlay interfaces / addresses, + // not opte interfaces / v2p mappings. + // + let found_v2p: HashSet = match client.list_v2p().await { + Ok(v) => v.into_inner(), + Err(e) => { + error!( + &log, + "unable to list opte v2p mappings for sled"; + "sled" => sled.serial_number(), + "error" => ?e + ); + continue; + } + }.into_iter().filter(|vnic| vnic.vni != Vni::SERVICES_VNI).collect(); + + info!(&log, "found opte v2p mappings"; "sled" => sled.serial_number(), "interfaces" => ?found_v2p); + + let v2p_to_add: Vec<_> = desired_v2p.difference(&found_v2p).collect(); + + let v2p_to_del: Vec<_> = found_v2p.difference(&desired_v2p).collect(); + + // + // Generally, we delete stale entries before adding new entries in RPWs to prevent stale entries + // from causing a conflict with an incoming entry. In the case of opte it doesn't matter which + // order we perform the next two steps in, since conflicting stale entries are overwritten by the + // incoming entries. + // + info!(&log, "v2p mappings to delete"; "sled" => sled.serial_number(), "mappings" => ?v2p_to_del); + for mapping in v2p_to_del { + if let Err(e) = client.del_v2p(&mapping).await { + error!( + &log, + "failed to delete v2p mapping from sled"; + "sled" => sled.serial_number(), + "mapping" => ?mapping, + "error" => ?e, + ); + } + } + + info!(&log, "v2p mappings to add"; "sled" => sled.serial_number(), "mappings" => ?v2p_to_add); + for mapping in v2p_to_add { + if let Err(e) = client.set_v2p(mapping).await { + error!( + &log, + "failed to add v2p mapping to sled"; + "sled" => sled.serial_number(), + "mapping" => ?mapping, + "error" => ?e, + ); + } + } + } + json!({}) + } + .boxed() + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 090caddf18..5874316fcd 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1515,13 +1515,14 @@ impl super::Nexus { new_runtime_state: &nexus::SledInstanceState, ) -> Result<(), Error> { notify_instance_updated( - &self.db_datastore, + &self.datastore(), &self.resolver().await, &self.opctx_alloc, opctx, &self.log, instance_id, new_runtime_state, + self.v2p_notification_tx.clone(), ) .await?; self.vpc_needed_notify_sleds(); @@ -1966,6 +1967,7 @@ pub(crate) struct InstanceUpdated { /// Invoked by a sled agent to publish an updated runtime state for an /// Instance. +#[allow(clippy::too_many_arguments)] // :( pub(crate) async fn notify_instance_updated( datastore: &DataStore, resolver: &internal_dns::resolver::Resolver, @@ -1974,6 +1976,7 @@ pub(crate) async fn notify_instance_updated( log: &slog::Logger, instance_id: &Uuid, new_runtime_state: &nexus::SledInstanceState, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result, Error> { let propolis_id = new_runtime_state.propolis_id; @@ -2012,6 +2015,7 @@ pub(crate) async fn notify_instance_updated( &authz_instance, db_instance.runtime(), &new_runtime_state.instance_state, + v2p_notification_tx.clone(), ) .await?; diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 30bea98cc6..de4de492e0 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -15,24 +15,20 @@ use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; -use nexus_types::deployment::SledFilter; -use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::SwitchLocation; -use omicron_common::retry_until_known_result; -use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; -use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; use uuid::Uuid; +use super::background::BackgroundTasks; + impl super::Nexus { /// Returns the set of switches with uplinks configured and boundary /// services enabled. @@ -43,41 +39,6 @@ impl super::Nexus { boundary_switches(&self.db_datastore, opctx).await } - /// Ensures that V2P mappings exist that indicate that the instance with ID - /// `instance_id` is resident on the sled with ID `sled_id`. - pub(crate) async fn create_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_id: Uuid, - ) -> Result<(), Error> { - create_instance_v2p_mappings( - &self.db_datastore, - &self.log, - opctx, - &self.opctx_alloc, - instance_id, - sled_id, - ) - .await - } - - /// Ensure that the necessary v2p mappings for an instance are deleted - pub(crate) async fn delete_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - ) -> Result<(), Error> { - delete_instance_v2p_mappings( - &self.db_datastore, - &self.log, - opctx, - &self.opctx_alloc, - instance_id, - ) - .await - } - /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. /// @@ -239,6 +200,7 @@ impl super::Nexus { opctx, &self.opctx_alloc, probe_id, + &self.background_tasks, ) .await } @@ -303,6 +265,7 @@ pub(crate) async fn ensure_updated_instance_network_config( authz_instance: &authz::Instance, prev_instance_state: &db::model::InstanceRuntimeState, new_instance_state: &nexus::InstanceRuntimeState, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result<(), Error> { let instance_id = authz_instance.id(); @@ -333,6 +296,7 @@ pub(crate) async fn ensure_updated_instance_network_config( opctx, opctx_alloc, authz_instance, + v2p_notification_tx, ) .await?; return Ok(()); @@ -412,15 +376,13 @@ pub(crate) async fn ensure_updated_instance_network_config( Err(e) => return Err(e), }; - create_instance_v2p_mappings( - datastore, - log, - opctx, - opctx_alloc, - instance_id, - new_sled_id, - ) - .await?; + if let Err(e) = v2p_notification_tx.send(()) { + error!( + log, + "error notifying background task of v2p change"; + "error" => ?e + ) + }; let (.., sled) = LookupPath::new(opctx, datastore).sled_id(new_sled_id).fetch().await?; @@ -735,20 +697,19 @@ pub(crate) async fn probe_ensure_dpd_config( async fn clear_instance_networking_state( datastore: &DataStore, log: &slog::Logger, - resolver: &internal_dns::resolver::Resolver, opctx: &OpContext, opctx_alloc: &OpContext, authz_instance: &authz::Instance, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result<(), Error> { - delete_instance_v2p_mappings( - datastore, - log, - opctx, - opctx_alloc, - authz_instance.id(), - ) - .await?; + if let Err(e) = v2p_notification_tx.send(()) { + error!( + log, + "error notifying background task of v2p change"; + "error" => ?e + ) + }; instance_delete_dpd_config( datastore, @@ -771,253 +732,6 @@ async fn clear_instance_networking_state( .await } -/// Ensures that V2P mappings exist that indicate that the instance with ID -/// `instance_id` is resident on the sled with ID `sled_id`. -pub(crate) async fn create_instance_v2p_mappings( - datastore: &DataStore, - log: &slog::Logger, - opctx: &OpContext, - opctx_alloc: &OpContext, - instance_id: Uuid, - sled_id: Uuid, -) -> Result<(), Error> { - info!(log, "creating V2P mappings for instance"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - // For every sled that isn't the sled this instance was allocated to, create - // a virtual to physical mapping for each of this instance's NICs. - // - // For the mappings to be correct, a few invariants must hold: - // - // - mappings must be set whenever an instance's sled changes (eg. - // during instance creation, migration, stop + start) - // - // - an instances' sled must not change while its corresponding mappings - // are being created - // - // - the same mapping creation must be broadcast to all sleds - // - // A more targeted approach would be to see what other instances share - // the VPC this instance is in (or more generally, what instances should - // have connectivity to this one), see what sleds those are allocated - // to, and only create V2P mappings for those sleds. - // - // There's additional work with this approach: - // - // - it means that delete calls are required as well as set calls, - // meaning that now the ordering of those matters (this may also - // necessitate a generation number for V2P mappings) - // - // - V2P mappings have to be bidirectional in order for both instances's - // packets to make a round trip. This isn't a problem with the - // broadcast approach because one of the sides will exist already, but - // it is something to orchestrate with a more targeted approach. - // - // TODO-correctness Default firewall rules currently will block - // instances in different VPCs from connecting to each other. If it ever - // stops doing this, the broadcast approach will create V2P mappings - // that shouldn't exist. - let (.., authz_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Look up the supplied sled's physical host IP. - let physical_host_ip = - nexus_networking::sled_lookup(&datastore, &opctx_alloc, sled_id)? - .fetch() - .await? - .1 - .ip - .into(); - - let mut last_sled_id: Option = None; - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = datastore - // XXX: InService might not be exactly correct - .sled_list(&opctx_alloc, &pagparams, SledFilter::InService) - .await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // set_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops creating mappings - // implicitly. - if sled.id() == sled_id { - continue; - } - - for nic in &instance_nics { - let client = nexus_networking::sled_client( - datastore, - opctx_alloc, - sled.id(), - log, - ) - .await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac, - physical_host_ip, - vni: nic.vni, - }; - - let log = log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.set_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) -} - -/// Ensure that the necessary v2p mappings for an instance are deleted -pub(crate) async fn delete_instance_v2p_mappings( - datastore: &DataStore, - log: &slog::Logger, - opctx: &OpContext, - opctx_alloc: &OpContext, - instance_id: Uuid, -) -> Result<(), Error> { - // For every sled that isn't the sled this instance was allocated to, delete - // the virtual to physical mapping for each of this instance's NICs. If - // there isn't a V2P mapping, del_v2p should be a no-op. - let (.., authz_instance) = LookupPath::new(&opctx, datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - let mut last_sled_id: Option = None; - - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = datastore - // XXX: InService might not be exactly correct - .sled_list(&opctx_alloc, &pagparams, SledFilter::InService) - .await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - for nic in &instance_nics { - let client = nexus_networking::sled_client( - &datastore, - &opctx_alloc, - sled.id(), - &log, - ) - .await?; - let nic_id = nic.id; - let mapping = DeleteVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - vni: nic.vni, - }; - - let log = log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.del_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) -} - /// Attempts to delete all of the Dendrite NAT configuration for the /// instance identified by `authz_instance`. /// @@ -1083,6 +797,7 @@ pub(crate) async fn probe_delete_dpd_config( opctx: &OpContext, opctx_alloc: &OpContext, probe_id: Uuid, + background_tasks: &BackgroundTasks, ) -> Result<(), Error> { info!(log, "deleting probe dpd configuration"; "probe_id" => %probe_id); @@ -1139,6 +854,7 @@ pub(crate) async fn probe_delete_dpd_config( } }; + background_tasks.activate(&background_tasks.task_v2p_manager); // Notify dendrite that there are changes for it to reconcile. // In the event of a failure to notify dendrite, we'll log an error // and rely on dendrite's RPW timer to catch it up. diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4b77788c96..3083a8e761 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -200,6 +200,9 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, + + /// Channel for notifying background task of change to opte v2p state + v2p_notification_tx: tokio::sync::watch::Sender<()>, } impl Nexus { @@ -390,6 +393,8 @@ impl Nexus { Arc::clone(&db_datastore), ); + let v2p_watcher_channel = tokio::sync::watch::channel(()); + let (saga_request, mut saga_request_recv) = SagaRequest::channel(); let background_tasks = background::BackgroundTasks::start( @@ -400,6 +405,7 @@ impl Nexus { config.deployment.id, resolver.clone(), saga_request, + v2p_watcher_channel.clone(), producer_registry, ); @@ -453,6 +459,7 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), + v2p_notification_tx: v2p_watcher_channel.0, }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index a6df7183d1..a6771f65a0 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1287,9 +1287,7 @@ pub mod test { assert!(no_instances_or_disks_on_sled(&sled_agent).await); let v2p_mappings = &*sled_agent.v2p_mappings.lock().await; - for (_nic_id, mappings) in v2p_mappings { - assert!(mappings.is_empty()); - } + assert!(v2p_mappings.is_empty()); } #[nexus_test(server = crate::Server)] diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index d93c1455ad..b6fedc175d 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -102,6 +102,7 @@ async fn sid_delete_network_interfaces( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -112,6 +113,7 @@ async fn sid_delete_network_interfaces( .instance_delete_all_network_interfaces(&opctx, ¶ms.authz_instance) .await .map_err(ActionError::action_failed)?; + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index b76bc2e37d..e7caedfc9c 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -447,50 +447,18 @@ async fn sis_dpd_ensure_undo( async fn sis_v2p_ensure( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - let instance_id = params.db_instance.id(); - - info!(osagactx.log(), "start saga: ensuring v2p mappings are configured"; - "instance_id" => %instance_id); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let sled_uuid = sagactx.lookup::("sled_id")?; - osagactx - .nexus() - .create_instance_v2p_mappings(&opctx, instance_id, sled_uuid) - .await - .map_err(ActionError::action_failed)?; - + let nexus = osagactx.nexus(); + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } async fn sis_v2p_ensure_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { - let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - let instance_id = params.db_instance.id(); - let sled_id = sagactx.lookup::("sled_id")?; - info!(osagactx.log(), "start saga: undoing v2p configuration"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - osagactx - .nexus() - .delete_instance_v2p_mappings(&opctx, instance_id) - .await - .map_err(ActionError::action_failed)?; - + let nexus = osagactx.nexus(); + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 2a5deeff51..287571cfd5 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1572,12 +1572,9 @@ fn create_snapshot_from_disk( if let Some(socket_map) = socket_map { for target in &mut opts.target { - *target = socket_map - .get(target) - .ok_or_else(|| { - anyhow!("target {} not found in map!", target) - })? - .clone(); + target.clone_from(socket_map.get(target).ok_or_else( + || anyhow!("target {} not found in map!", target), + )?); } } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 25a6d97efc..49a61cfa36 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -111,6 +111,7 @@ switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 7ad52b9919..51e2552e85 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -66,6 +66,7 @@ use omicron_nexus::app::MIN_MEMORY_BYTES_PER_INSTANCE; use omicron_nexus::Nexus; use omicron_nexus::TestInterfaces as _; use omicron_sled_agent::sim::SledAgent; +use omicron_test_utils::dev::poll::wait_for_condition; use sled_agent_client::TestInterfaces as _; use std::convert::TryFrom; use std::net::Ipv4Addr; @@ -660,14 +661,6 @@ async fn test_instance_start_creates_networking_state( .await .unwrap(); - let instance_state = datastore - .instance_fetch_with_vmm(&opctx, &authz_instance) - .await - .unwrap(); - - let sled_id = - instance_state.sled_id().expect("running instance should have a sled"); - let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await @@ -675,13 +668,7 @@ async fn test_instance_start_creates_networking_state( assert_eq!(guest_nics.len(), 1); for agent in &sled_agents { - // TODO(#3107) Remove this bifurcation when Nexus programs all mappings - // itself. - if agent.id != sled_id { - assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; - } else { - assert!(agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } } @@ -861,24 +848,7 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { let mut sled_agents = vec![cptestctx.sled_agent.sled_agent.clone()]; sled_agents.extend(other_sleds.iter().map(|tup| tup.1.sled_agent.clone())); for sled_agent in &sled_agents { - // Starting the instance should have programmed V2P mappings to all the - // sleds except the one where the instance is running. - // - // TODO(#3107): In practice, the instance's sled also has V2P mappings, but - // these are established during VMM setup (i.e. as part of creating the - // instance's OPTE ports) instead of being established by explicit calls - // from Nexus. Simulated sled agent handles the latter calls but does - // not currently update any mappings during simulated instance creation, - // so the check below verifies that no mappings exist on the instance's - // own sled instead of checking for a real mapping. Once Nexus programs - // all mappings explicitly (without skipping the instance's current - // sled) this bifurcation should be removed. - if sled_agent.id != original_sled_id { - assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) - .await; - } else { - assert!(sled_agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni).await; } let dst_sled_id = if original_sled_id == cptestctx.sled_agent.sled_agent.id @@ -4545,14 +4515,6 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let instance_state = datastore - .instance_fetch_with_vmm(&opctx, &authz_instance) - .await - .unwrap(); - - let sled_id = - instance_state.sled_id().expect("running instance should have a sled"); - let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await @@ -4565,14 +4527,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { sled_agents.push(&cptestctx.sled_agent.sled_agent); for sled_agent in &sled_agents { - // TODO(#3107) Remove this bifurcation when Nexus programs all mappings - // itself. - if sled_agent.id != sled_id { - assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) - .await; - } else { - assert!(sled_agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni).await; } // Delete the instance @@ -4589,8 +4544,21 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { // Validate that every sled no longer has the V2P mapping for this instance for sled_agent in &sled_agents { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; - assert!(v2p_mappings.is_empty()); + let condition = || async { + let v2p_mappings = sled_agent.v2p_mappings.lock().await; + if v2p_mappings.is_empty() { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("v2p mappings should be empty"); } } @@ -4687,14 +4655,28 @@ async fn assert_sled_v2p_mappings( nic: &InstanceNetworkInterface, vni: Vni, ) { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; - assert!(!v2p_mappings.is_empty()); - - let mapping = v2p_mappings.get(&nic.identity.id).unwrap().last().unwrap(); - assert_eq!(mapping.virtual_ip, nic.ip); - assert_eq!(mapping.virtual_mac, nic.mac); - assert_eq!(mapping.physical_host_ip, sled_agent.ip); - assert_eq!(mapping.vni, vni); + let condition = || async { + let v2p_mappings = sled_agent.v2p_mappings.lock().await; + let mapping = v2p_mappings.iter().find(|mapping| { + mapping.virtual_ip == nic.ip + && mapping.virtual_mac == nic.mac + && mapping.physical_host_ip == sled_agent.ip + && mapping.vni == vni + }); + + if mapping.is_some() { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("matching v2p mapping should be present"); } /// Simulate completion of an ongoing instance state transition. To do this, we diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8c71d9eeb1..d332a9de01 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -842,26 +842,41 @@ } } }, - "/v2p/{interface_id}": { + "/v2p": { + "get": { + "summary": "List v2p mappings present on sled", + "operationId": "list_v2p", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_VirtualNetworkInterfaceHost", + "type": "array", + "items": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, "put": { "summary": "Create a mapping from a virtual NIC to a physical host", "operationId": "set_v2p", - "parameters": [ - { - "in": "path", - "name": "interface_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SetVirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -882,22 +897,11 @@ "delete": { "summary": "Delete a mapping from a virtual NIC to a physical host", "operationId": "del_v2p", - "parameters": [ - { - "in": "path", - "name": "interface_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DeleteVirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -2073,29 +2077,6 @@ "target" ] }, - "DeleteVirtualNetworkInterfaceHost": { - "description": "The data needed to identify a virtual IP for which a sled maintains an OPTE virtual-to-physical mapping such that that mapping can be deleted.", - "type": "object", - "properties": { - "virtual_ip": { - "description": "The virtual IP whose mapping should be deleted.", - "type": "string", - "format": "ip" - }, - "vni": { - "description": "The VNI for the network containing the virtual IP whose mapping should be deleted.", - "allOf": [ - { - "$ref": "#/components/schemas/Vni" - } - ] - } - }, - "required": [ - "virtual_ip", - "vni" - ] - }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -4515,32 +4496,6 @@ "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, - "SetVirtualNetworkInterfaceHost": { - "description": "A mapping from a virtual NIC to a physical host", - "type": "object", - "properties": { - "physical_host_ip": { - "type": "string", - "format": "ipv6" - }, - "virtual_ip": { - "type": "string", - "format": "ip" - }, - "virtual_mac": { - "$ref": "#/components/schemas/MacAddr" - }, - "vni": { - "$ref": "#/components/schemas/Vni" - } - }, - "required": [ - "physical_host_ip", - "virtual_ip", - "virtual_mac", - "vni" - ] - }, "SledInstanceState": { "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", "type": "object", @@ -4809,6 +4764,32 @@ "version" ] }, + "VirtualNetworkInterfaceHost": { + "description": "A mapping from a virtual NIC to a physical host", + "type": "object", + "properties": { + "physical_host_ip": { + "type": "string", + "format": "ipv6" + }, + "virtual_ip": { + "type": "string", + "format": "ip" + }, + "virtual_mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "physical_host_ip", + "virtual_ip", + "virtual_mac", + "vni" + ] + }, "VmmRuntimeState": { "description": "The dynamic runtime properties of an individual VMM process.", "type": "object", diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json index 111d7c0ed2..286ac63405 100644 --- a/oximeter/collector/tests/output/self-stat-schema.json +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -39,7 +39,7 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-17T01:26:16.797600385Z" + "created": "2024-05-21T18:32:24.199619581Z" }, "oximeter_collector:failed_collections": { "timeseries_name": "oximeter_collector:failed_collections", @@ -86,6 +86,6 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-17T01:26:16.798713487Z" + "created": "2024-05-21T18:32:24.200514936Z" } } \ No newline at end of file diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index 3284c70c1f..f40572d762 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -496,7 +496,7 @@ mod tests { ) .unwrap(); ts0.points.start_times = None; - ts0.points.timestamps = timestamps.clone(); + ts0.points.timestamps.clone_from(×tamps); *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(1.0), if matches!( diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs index 46d19b9cdc..0205868f5c 100644 --- a/oximeter/db/src/oxql/ast/table_ops/limit.rs +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -150,7 +150,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps = timestamps.clone(); + timeseries.points.timestamps.clone_from(×tamps); timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ Some(1), Some(2), @@ -166,7 +166,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps = timestamps.clone(); + timeseries.points.timestamps.clone_from(×tamps); timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ Some(4), Some(5), diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a2ed3895ec..7c513cfbad 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -4,5 +4,5 @@ # # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. -channel = "1.77.2" +channel = "1.78.0" profile = "default" diff --git a/schema/crdb/add-view-for-v2p-mappings/up01.sql b/schema/crdb/add-view-for-v2p-mappings/up01.sql new file mode 100644 index 0000000000..96d5723c00 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up01.sql @@ -0,0 +1,41 @@ +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; diff --git a/schema/crdb/add-view-for-v2p-mappings/up02.sql b/schema/crdb/add-view-for-v2p-mappings/up02.sql new file mode 100644 index 0000000000..5ab1075fbe --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up02.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS network_interface_by_parent +ON omicron.public.network_interface (parent_id) +STORING (name, kind, vpc_id, subnet_id, mac, ip, slot); diff --git a/schema/crdb/add-view-for-v2p-mappings/up03.sql b/schema/crdb/add-view-for-v2p-mappings/up03.sql new file mode 100644 index 0000000000..86cef026a1 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up03.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS sled_by_policy_and_state +ON omicron.public.sled (sled_policy, sled_state, id) STORING (ip); diff --git a/schema/crdb/add-view-for-v2p-mappings/up04.sql b/schema/crdb/add-view-for-v2p-mappings/up04.sql new file mode 100644 index 0000000000..809146b809 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up04.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS active_vmm +on omicron.public.vmm (time_deleted, sled_id, instance_id); diff --git a/schema/crdb/add-view-for-v2p-mappings/up05.sql b/schema/crdb/add-view-for-v2p-mappings/up05.sql new file mode 100644 index 0000000000..cdabdc6a96 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up05.sql @@ -0,0 +1,4 @@ +CREATE INDEX IF NOT EXISTS v2p_mapping_details +ON omicron.public.network_interface ( + time_deleted, kind, subnet_id, vpc_id, parent_id +) STORING (mac, ip); diff --git a/schema/crdb/add-view-for-v2p-mappings/up06.sql b/schema/crdb/add-view-for-v2p-mappings/up06.sql new file mode 100644 index 0000000000..afd10ed13f --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up06.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS sled_by_policy +ON omicron.public.sled (sled_policy) STORING (ip, sled_state); diff --git a/schema/crdb/add-view-for-v2p-mappings/up07.sql b/schema/crdb/add-view-for-v2p-mappings/up07.sql new file mode 100644 index 0000000000..defe411f96 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up07.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS vmm_by_instance_id +ON omicron.public.vmm (instance_id) STORING (sled_id); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 9466c04d15..fc70d10f3d 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3808,6 +3808,73 @@ ON omicron.public.switch_port (port_settings_id, port_name) STORING (switch_loca CREATE INDEX IF NOT EXISTS switch_port_name ON omicron.public.switch_port (port_name); +COMMIT; +BEGIN; + +-- view for v2p mapping rpw +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; + +CREATE INDEX IF NOT EXISTS network_interface_by_parent +ON omicron.public.network_interface (parent_id) +STORING (name, kind, vpc_id, subnet_id, mac, ip, slot); + +CREATE INDEX IF NOT EXISTS sled_by_policy_and_state +ON omicron.public.sled (sled_policy, sled_state, id) STORING (ip); + +CREATE INDEX IF NOT EXISTS active_vmm +ON omicron.public.vmm (time_deleted, sled_id, instance_id); + +CREATE INDEX IF NOT EXISTS v2p_mapping_details +ON omicron.public.network_interface ( + time_deleted, kind, subnet_id, vpc_id, parent_id +) STORING (mac, ip); + +CREATE INDEX IF NOT EXISTS sled_by_policy +ON omicron.public.sled (sled_policy) STORING (ip, sled_state); + +CREATE INDEX IF NOT EXISTS vmm_by_instance_id +ON omicron.public.vmm (instance_id) STORING (sled_id); + /* * Metadata for the schema itself. This version number isn't great, as there's * nothing to ensure it gets bumped when it should be, but it's a start. diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index 5d9c01e7f2..9baf0e7ef3 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -299,11 +299,3 @@ impl BootstrapAgentHandleReceiver { tx.send(Ok(())).unwrap(); } } - -struct AbortOnDrop(JoinHandle); - -impl Drop for AbortOnDrop { - fn drop(&mut self) { - self.0.abort(); - } -} diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index f8156a617e..ff5ab393ea 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -25,9 +25,7 @@ use dropshot::{ HttpResponseUpdatedNoContent, Path, Query, RequestContext, StreamingBody, TypedBody, }; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use installinator_common::M2Slot; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ @@ -73,6 +71,7 @@ pub fn api() -> SledApiDescription { api.register(zone_bundle_cleanup_context_update)?; api.register(zone_bundle_cleanup)?; api.register(sled_role_get)?; + api.register(list_v2p)?; api.register(set_v2p)?; api.register(del_v2p)?; api.register(timesync_get)?; @@ -656,24 +655,16 @@ async fn vpc_firewall_rules_put( Ok(HttpResponseUpdatedNoContent()) } -/// Path parameters for V2P mapping related requests (sled agent API) -#[allow(dead_code)] -#[derive(Deserialize, JsonSchema)] -struct V2pPathParam { - interface_id: Uuid, -} - /// Create a mapping from a virtual NIC to a physical host // Keep interface_id to maintain parity with the simulated sled agent, which // requires interface_id on the path. #[endpoint { method = PUT, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn set_v2p( rqctx: RequestContext, - _path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); @@ -688,12 +679,11 @@ async fn set_v2p( // requires interface_id on the path. #[endpoint { method = DELETE, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn del_v2p( rqctx: RequestContext, - _path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); @@ -703,6 +693,22 @@ async fn del_v2p( Ok(HttpResponseUpdatedNoContent()) } +/// List v2p mappings present on sled +// Used by nexus background task +#[endpoint { + method = GET, + path = "/v2p/", +}] +async fn list_v2p( + rqctx: RequestContext, +) -> Result>, HttpError> { + let sa = rqctx.context(); + + let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; + + Ok(HttpResponseOk(vnics)) +} + #[endpoint { method = GET, path = "/timesync", diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 6cddac6fb8..ae1318a8b1 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -20,8 +20,7 @@ use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; -use illumos_utils::opte::params::DeleteVirtualNetworkInterfaceHost; -use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; @@ -54,6 +53,7 @@ pub fn api() -> SledApiDescription { api.register(vpc_firewall_rules_put)?; api.register(set_v2p)?; api.register(del_v2p)?; + api.register(list_v2p)?; api.register(uplink_ensure)?; api.register(read_network_bootstore_config)?; api.register(write_network_bootstore_config)?; @@ -343,27 +343,19 @@ async fn vpc_firewall_rules_put( Ok(HttpResponseUpdatedNoContent()) } -/// Path parameters for V2P mapping related requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct V2pPathParam { - interface_id: Uuid, -} - /// Create a mapping from a virtual NIC to a physical host #[endpoint { method = PUT, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn set_v2p( rqctx: RequestContext>, - path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); - let interface_id = path_params.into_inner().interface_id; let body_args = body.into_inner(); - sa.set_virtual_nic_host(interface_id, &body_args) + sa.set_virtual_nic_host(&body_args) .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; @@ -373,24 +365,37 @@ async fn set_v2p( /// Delete a mapping from a virtual NIC to a physical host #[endpoint { method = DELETE, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn del_v2p( rqctx: RequestContext>, - path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); - let interface_id = path_params.into_inner().interface_id; let body_args = body.into_inner(); - sa.unset_virtual_nic_host(interface_id, &body_args) + sa.unset_virtual_nic_host(&body_args) .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseUpdatedNoContent()) } +/// List v2p mappings present on sled +#[endpoint { + method = GET, + path = "/v2p/", +}] +async fn list_v2p( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + + Ok(HttpResponseOk(vnics)) +} + #[endpoint { method = POST, path = "/switch-ports", diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 298a8adc34..d9308bf769 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -26,9 +26,7 @@ use anyhow::bail; use anyhow::Context; use dropshot::{HttpError, HttpServer}; use futures::lock::Mutex; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use ipnetwork::Ipv6Network; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -74,7 +72,7 @@ pub struct SledAgent { nexus_address: SocketAddr, pub nexus_client: Arc, disk_id_to_region_ids: Mutex>>, - pub v2p_mappings: Mutex>>, + pub v2p_mappings: Mutex>, mock_propolis: Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances @@ -189,7 +187,7 @@ impl SledAgent { nexus_address, nexus_client, disk_id_to_region_ids: Mutex::new(HashMap::new()), - v2p_mappings: Mutex::new(HashMap::new()), + v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), @@ -672,36 +670,29 @@ impl SledAgent { pub async fn set_virtual_nic_host( &self, - interface_id: Uuid, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { let mut v2p_mappings = self.v2p_mappings.lock().await; - let vec = v2p_mappings.entry(interface_id).or_default(); - vec.push(mapping.clone()); + v2p_mappings.insert(mapping.clone()); Ok(()) } pub async fn unset_virtual_nic_host( &self, - interface_id: Uuid, - mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { let mut v2p_mappings = self.v2p_mappings.lock().await; - let vec = v2p_mappings.entry(interface_id).or_default(); - vec.retain(|x| { - x.virtual_ip != mapping.virtual_ip || x.vni != mapping.vni - }); - - // If the last entry was removed, remove the entire interface ID so that - // tests don't have to distinguish never-created entries from - // previously-extant-but-now-empty entries. - if vec.is_empty() { - v2p_mappings.remove(&interface_id); - } - + v2p_mappings.remove(mapping); Ok(()) } + pub async fn list_virtual_nics( + &self, + ) -> Result, Error> { + let v2p_mappings = self.v2p_mappings.lock().await; + Ok(Vec::from_iter(v2p_mappings.clone())) + } + pub async fn instance_put_external_ip( &self, instance_id: Uuid, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 6f73d88fe6..ee33733718 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -37,9 +37,7 @@ use derive_more::From; use dropshot::HttpError; use futures::stream::FuturesUnordered; use futures::StreamExt; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; @@ -1051,9 +1049,15 @@ impl SledAgent { .map_err(Error::from) } + pub async fn list_virtual_nics( + &self, + ) -> Result, Error> { + self.inner.port_manager.list_virtual_nics().map_err(Error::from) + } + pub async fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { self.inner .port_manager @@ -1063,7 +1067,7 @@ impl SledAgent { pub async fn unset_virtual_nic_host( &self, - mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { self.inner .port_manager diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 0bf2fa6e53..e9a47de29e 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -263,7 +263,7 @@ impl HardwareView { updates.push(DiskAdded(disk.clone())); } - self.disks = polled_hw.disks.clone(); + self.disks.clone_from(&polled_hw.disks); } } diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index a2e75249b3..b44c8e5b53 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -347,7 +347,7 @@ impl StorageResources { // This leaves the presence of the disk still in "Self", but // downgrades the disk to an unmanaged status. ManagedDisk::ExplicitlyManaged(disk) => { - if self.control_plane_disks.get(identity).is_none() { + if !self.control_plane_disks.contains_key(identity) { *managed_disk = ManagedDisk::Unmanaged(RawDisk::from(disk.clone())); updated = true; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 696411966b..0ed7a0562b 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -56,6 +56,7 @@ sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 [default_region_allocation_strategy] diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 206f716fa7..c57d2d3ba2 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -56,6 +56,7 @@ sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 [default_region_allocation_strategy] diff --git a/tools/console_version b/tools/console_version index 182ef5e818..07cf4cc912 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="078d17117a3937d571bb5535f9791db65be7afc5" -SHA2="f3bc51a9ddf5356ecda85ff11eec032da880be162358bb7ee676ab823e59476c" +COMMIT="a228b75ba35952b68c0b8b0892c452d4fc29467a" +SHA2="8d5b06680e5986b633b3f97e46d7823ea2dddf2b98930d8c6a4f7dc1eb382048" diff --git a/tools/permslip_staging b/tools/permslip_staging index f5ed8e873a..20a362ade0 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ 03df89d44ad8b653abbeb7fbb83821869f008733e9da946457e72a13cb11d6cc manifest-gimlet-v1.0.19.toml b973cc9feb20f7bba447e7f5291c4070387fa9992deab81301f67f0a3844cd0c manifest-oxide-rot-1-v1.0.11.toml aae829e02d79ec0fe19019c783b6426c6fcc1fe4427aea70b65afc2884f53db8 manifest-psc-v1.0.17.toml -9bd043382ad5c7cdb8f00a66e401a6c4b88e8d588915f304d2c261ea7df4d1b5 manifest-sidecar-v1.0.16.toml +ae00003c288ec4f520167c68de4999e1dfa15b63afe2f89e5ed1cfb8d707ebb9 manifest-sidecar-v1.0.19.toml diff --git a/wicket/src/ui/defaults/dimensions.rs b/wicket/src/ui/defaults/dimensions.rs index ca76807786..2961400aa7 100644 --- a/wicket/src/ui/defaults/dimensions.rs +++ b/wicket/src/ui/defaults/dimensions.rs @@ -18,14 +18,6 @@ pub trait RectExt { /// /// Panics if `height > self.height`. fn center_vertically(self, height: u16) -> Self; - - /// Create a new maximally sized `Rect` that is bounded by `self`, and - /// shifted down by `y` columns. In order to maintain the bounding, the - /// new `Rect` is originally sized to `self` and then shrunk by the same - /// amount it is shifted downwards: namely `y` columns. - /// - /// Panics if `y > self.height`. - fn move_down_within_bounds(self, y: u16) -> Self; } impl RectExt for Rect { @@ -42,10 +34,4 @@ impl RectExt for Rect { self.height = height; self } - - fn move_down_within_bounds(mut self, y: u16) -> Self { - self.y = self.y + y; - self.height -= y; - self - } } diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index 42853a4076..10253bc2f7 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -269,7 +269,7 @@ impl UpdateTracker { // This used to check that the task was finished, but we changed // that in favor of forcing users to clear update state before // starting a new one. - update_data.sp_update_data.get(sp).is_some() + update_data.sp_update_data.contains_key(sp) }) .copied() .collect();