From 4c85d823a2a13d35b7bb72b78c131293d35de017 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 1 Aug 2024 21:17:09 +0000 Subject: [PATCH] Track more kinds of datalinks on a sled - Drop the old `physical_data_link:*` timeseries, in favor of an expanded `sled_data_link:*`. This includes the sled identifiers, and also the _kind_ of link, which incorporates physical, VNIC, and OPTE devices. Expunge the old timeseries. - Make the existing metrics manager into a small wrapper around a background task. Add message types for asking the task to start / stop tracking various things, for now just VNICs and OPTE ports. Physical links can also be tracked (but not untracked), which the sled agent does immediately after creating the task. - Add the metrics request queue to the instance manager, instance, and instance runner, and have the runner start / stop tracking the control VNIC and OPTE ports after booting the zone and before stopping it respectively. - Add the metrics request queue to the probe manager, and also start / stop tracking the links in the zones. - Add the metrics queue to the service manager. This one is more complicated, because this object has to exist before the `SledAgent` object itself, in order to start the switch zone. Instead, the manager is provided the queue when it's notified that the `SledAgent` exists, and at the same time tries to use the queue to notify the metrics task about the control VNIC that must have already been plumbed into the zone. The service manager also tracks / untracks the VNICs and OPTE ports for the _Omicron_ zones it starts, which is much simpler. - Add some helper methods into the `{Running,Installed}Zone}` types for listing the names of the control VNIC, bootstrap VNIC, and any OPTE port names. These are used to tell the metrics task what links to track. - Clean up a few straggling comments or references to the VNICs that were previously required between OPTE ports and the guest Viona driver. Those were removed in #5989. --- illumos-utils/src/dladm.rs | 8 +- illumos-utils/src/link.rs | 18 +- illumos-utils/src/opte/port_manager.rs | 40 +- illumos-utils/src/running_zone.rs | 23 + .../replicated/8/timeseries-to-delete.txt | 6 + .../single-node/8/timeseries-to-delete.txt | 6 + oximeter/db/src/model.rs | 2 +- oximeter/instruments/src/kstat/link.rs | 99 ++-- ...cal-data-link.toml => sled-data-link.toml} | 36 +- sled-agent/src/instance.rs | 38 +- sled-agent/src/instance_manager.rs | 6 + sled-agent/src/metrics.rs | 453 ++++++++++++------ sled-agent/src/probe_manager.rs | 25 + sled-agent/src/services.rs | 281 +++++++++-- sled-agent/src/sled_agent.rs | 71 +-- 15 files changed, 783 insertions(+), 329 deletions(-) create mode 100644 oximeter/db/schema/replicated/8/timeseries-to-delete.txt create mode 100644 oximeter/db/schema/single-node/8/timeseries-to-delete.txt rename oximeter/oximeter/schema/{physical-data-link.toml => sled-data-link.toml} (67%) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 1e8a7e70bf4..b93353eb189 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -17,11 +17,6 @@ pub const VNIC_PREFIX: &str = "ox"; pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; pub const VNIC_PREFIX_BOOTSTRAP: &str = "oxBootstrap"; -/// Prefix used to name VNICs over xde devices / OPTE ports. -// TODO-correctness: Remove this when `xde` devices can be directly used beneath -// Viona, and thus plumbed directly to guests. -pub const VNIC_PREFIX_GUEST: &str = "vopte"; - /// Path to the DLADM command. pub const DLADM: &str = "/usr/sbin/dladm"; @@ -42,6 +37,9 @@ pub const BOOTSTRAP_ETHERSTUB_VNIC_NAME: &str = "bootstrap0"; /// The prefix for Chelsio link names. pub const CHELSIO_LINK_PREFIX: &str = "cxgbe"; +/// The prefix for OPTE link names +pub const OPTE_LINK_PREFIX: &str = "opte"; + /// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] pub enum FindPhysicalLinkError { diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 871ba55e753..5d0c80b0e1f 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -7,7 +7,7 @@ use crate::destructor::{Deletable, Destructor}; use crate::dladm::{ CreateVnicError, DeleteVnicError, VnicSource, VNIC_PREFIX, - VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, + VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, }; use omicron_common::api::external::MacAddr; use std::sync::{ @@ -125,7 +125,6 @@ impl VnicAllocator
{ pub enum LinkKind { Physical, OxideControlVnic, - GuestVnic, OxideBootstrapVnic, } @@ -135,14 +134,20 @@ impl LinkKind { pub fn from_name(name: &str) -> Option { if name.starts_with(VNIC_PREFIX) { Some(LinkKind::OxideControlVnic) - } else if name.starts_with(VNIC_PREFIX_GUEST) { - Some(LinkKind::GuestVnic) } else if name.starts_with(VNIC_PREFIX_BOOTSTRAP) { Some(LinkKind::OxideBootstrapVnic) } else { None } } + + /// Return `true` if this link is a VNIC. + pub const fn is_vnic(&self) -> bool { + match self { + LinkKind::Physical => false, + LinkKind::OxideControlVnic | LinkKind::OxideBootstrapVnic => true, + } + } } #[derive(thiserror::Error, Debug)] @@ -203,6 +208,11 @@ impl Link { pub fn kind(&self) -> LinkKind { self.kind } + + /// Return true if this is a VNIC. + pub fn is_vnic(&self) -> bool { + self.kind.is_vnic() + } } impl Drop for Link { diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index b6d28d1b06a..93c646cfab4 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,6 +4,7 @@ //! Manager for all OPTE ports on a Helios system +use crate::dladm::OPTE_LINK_PREFIX; use crate::opte::opte_firewall_rules; use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; @@ -52,9 +53,6 @@ use std::sync::Arc; use std::sync::Mutex; use uuid::Uuid; -// Prefix used to identify xde data links. -const XDE_LINK_PREFIX: &str = "opte"; - /// Stored routes (and usage count) for a given VPC/subnet. #[derive(Debug, Clone)] struct RouteSet { @@ -85,7 +83,7 @@ impl PortManagerInner { fn next_port_name(&self) -> String { format!( "{}{}", - XDE_LINK_PREFIX, + OPTE_LINK_PREFIX, self.next_port_id.fetch_add(1, Ordering::SeqCst) ) } @@ -265,8 +263,9 @@ impl PortManager { // So we: // // - create the xde device - // - create the vnic, cleaning up the xde device if that fails - // - add both to the Port + // - create the port ticket + // - create the port + // - add both to the PortManager's map // // The Port object's drop implementation will clean up both of those, if // any of the remaining fallible operations fail. @@ -289,21 +288,6 @@ impl PortManager { )?; hdl }; - - // Initialize firewall rules for the new port. - let rules = opte_firewall_rules(firewall_rules, &vni, &mac); - debug!( - self.inner.log, - "Setting firewall rules"; - "port_name" => &port_name, - "rules" => ?&rules, - ); - #[cfg(target_os = "illumos")] - hdl.set_fw_rules(&oxide_vpc::api::SetFwRulesReq { - port_name: port_name.clone(), - rules, - })?; - let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new(nic.id, nic.kind, self.inner.clone()); @@ -326,6 +310,20 @@ impl PortManager { (port, ticket) }; + // Initialize firewall rules for the new port. + let rules = opte_firewall_rules(firewall_rules, &vni, &mac); + debug!( + self.inner.log, + "Setting firewall rules"; + "port_name" => &port_name, + "rules" => ?&rules, + ); + #[cfg(target_os = "illumos")] + hdl.set_fw_rules(&oxide_vpc::api::SetFwRulesReq { + port_name: port_name.clone(), + rules, + })?; + // Check locally to see whether we have any routes from the // control plane for this port already installed. If not, // create a record to show that we're interested in receiving diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 605809f0192..5dbe4338cf2 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -362,6 +362,22 @@ impl RunningZone { self.inner.zonepath.pool.as_ref() } + /// Return the name of a bootstrap VNIC in the zone, if any. + pub fn bootstrap_vnic_name(&self) -> Option<&str> { + self.inner.get_bootstrap_vnic_name() + } + + /// Return the name of the control VNIC. + pub fn control_vnic_name(&self) -> &str { + self.inner.get_control_vnic_name() + } + + /// Return the names of any OPTE ports in the zone. + pub fn opte_port_names(&self) -> impl Iterator { + self.inner.opte_ports().map(|port| port.name()) + } + + /// Return the control IP address. pub fn control_interface(&self) -> AddrObject { AddrObject::new( self.inner.get_control_vnic_name(), @@ -939,10 +955,17 @@ impl InstalledZone { zone_name } + /// Get the name of the bootstrap VNIC in the zone, if any. + pub fn get_bootstrap_vnic_name(&self) -> Option<&str> { + self.bootstrap_vnic.as_ref().map(|link| link.name()) + } + + /// Get the name of the control VNIC in the zone. pub fn get_control_vnic_name(&self) -> &str { self.control_vnic.name() } + /// Return the name of the zone itself. pub fn name(&self) -> &str { &self.name } diff --git a/oximeter/db/schema/replicated/8/timeseries-to-delete.txt b/oximeter/db/schema/replicated/8/timeseries-to-delete.txt new file mode 100644 index 00000000000..969e18f72d6 --- /dev/null +++ b/oximeter/db/schema/replicated/8/timeseries-to-delete.txt @@ -0,0 +1,6 @@ +physical_data_link:bytes_received +physical_data_link:bytes_sent +physical_data_link:errors_received +physical_data_link:errors_sent +physical_data_link:packets_received +physical_data_link:packets_sent diff --git a/oximeter/db/schema/single-node/8/timeseries-to-delete.txt b/oximeter/db/schema/single-node/8/timeseries-to-delete.txt new file mode 100644 index 00000000000..969e18f72d6 --- /dev/null +++ b/oximeter/db/schema/single-node/8/timeseries-to-delete.txt @@ -0,0 +1,6 @@ +physical_data_link:bytes_received +physical_data_link:bytes_sent +physical_data_link:errors_received +physical_data_link:errors_sent +physical_data_link:packets_received +physical_data_link:packets_sent diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 878017797af..05667058b53 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 7; +pub const OXIMETER_VERSION: u64 = 8; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/instruments/src/kstat/link.rs b/oximeter/instruments/src/kstat/link.rs index 05075940565..2688b535139 100644 --- a/oximeter/instruments/src/kstat/link.rs +++ b/oximeter/instruments/src/kstat/link.rs @@ -17,7 +17,7 @@ use kstat_rs::Named; use oximeter::types::Cumulative; use oximeter::Sample; -oximeter::use_timeseries!("physical-data-link.toml"); +oximeter::use_timeseries!("sled-data-link.toml"); // Helper function to extract the same kstat metrics from all link targets. fn extract_link_kstats( @@ -32,7 +32,7 @@ where let Named { name, value } = named_data; if *name == "rbytes64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::BytesReceived { + let metric = sled_data_link::BytesReceived { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -40,7 +40,7 @@ where })) } else if *name == "obytes64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::BytesSent { + let metric = sled_data_link::BytesSent { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -48,7 +48,7 @@ where })) } else if *name == "ipackets64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::PacketsReceived { + let metric = sled_data_link::PacketsReceived { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -56,7 +56,7 @@ where })) } else if *name == "opackets64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::PacketsSent { + let metric = sled_data_link::PacketsSent { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -64,7 +64,7 @@ where })) } else if *name == "ierrors" { Some(value.as_u32().and_then(|x| { - let metric = physical_data_link::ErrorsReceived { + let metric = sled_data_link::ErrorsReceived { datum: Cumulative::with_start_time(creation_time, x.into()), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -72,7 +72,7 @@ where })) } else if *name == "oerrors" { Some(value.as_u32().and_then(|x| { - let metric = physical_data_link::ErrorsSent { + let metric = sled_data_link::ErrorsSent { datum: Cumulative::with_start_time(creation_time, x.into()), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -88,7 +88,7 @@ trait LinkKstatTarget: KstatTarget { fn link_name(&self) -> &str; } -impl LinkKstatTarget for physical_data_link::PhysicalDataLink { +impl LinkKstatTarget for sled_data_link::SledDataLink { fn link_name(&self) -> &str { &self.link_name } @@ -155,6 +155,10 @@ mod tests { const RACK_ID: Uuid = uuid!("de784702-cafb-41a9-b3e5-93af189def29"); const SLED_ID: Uuid = uuid!("88240343-5262-45f4-86f1-3c82fe383f2a"); + const SLED_MODEL: &str = "fake-gimlet"; + const SLED_REVISION: u32 = 1; + const SLED_SERIAL: &str = "fake-serial"; + const KIND: &str = "etherstub"; // An etherstub we can use for testing. // @@ -221,15 +225,16 @@ mod tests { } #[test] - fn test_physical_datalink() { + fn test_sled_datalink() { let link = TestEtherstub::new(); - let sn = String::from("BRM000001"); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), + sled_serial: SLED_SERIAL.into(), link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let ctl = Ctl::new().unwrap(); let ctl = ctl.update().unwrap(); @@ -246,14 +251,15 @@ mod tests { #[tokio::test] async fn test_kstat_sampler() { let mut sampler = KstatSampler::new(&test_logger()).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), + sled_serial: SLED_SERIAL.into(), link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let details = CollectionDetails::never(Duration::from_secs(1)); let id = sampler.add_target(dl, details).await.unwrap(); @@ -294,14 +300,15 @@ mod tests { let limit = 2; let mut sampler = KstatSampler::with_sample_limit(&test_logger(), limit).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let details = CollectionDetails::never(Duration::from_secs(1)); sampler.add_target(dl, details).await.unwrap(); @@ -328,7 +335,7 @@ mod tests { let samples: Vec<_> = sampler.produce().unwrap().collect(); let (link_samples, dropped_samples): (Vec<_>, Vec<_>) = samples .iter() - .partition(|s| s.timeseries_name.contains("physical_data_link")); + .partition(|s| s.timeseries_name.contains("sled_data_link")); println!("{link_samples:#?}"); assert_eq!(link_samples.len(), limit); @@ -360,15 +367,16 @@ mod tests { // make sure we expire after the expected period. let log = test_logger(); let mut sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -417,15 +425,16 @@ mod tests { async fn test_kstat_start_time_is_equal() { let log = test_logger(); let mut sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -444,7 +453,7 @@ mod tests { let samples = sampler.produce().unwrap(); let mut start_times = samples .filter(|sample| { - sample.timeseries_name.as_str().starts_with("physical") + sample.timeseries_name.as_str().starts_with("sled") }) .map(|sample| sample.measurement.start_time().unwrap()); let first = start_times.next().unwrap(); @@ -461,7 +470,6 @@ mod tests { // make sure the creation times are pruned. let log = test_logger(); let sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); let path = KstatPath { module: "link".to_string(), @@ -469,12 +477,14 @@ mod tests { name: link.name.clone(), }; info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -508,7 +518,6 @@ mod tests { // make sure the creation times are pruned. let log = test_logger(); let sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); let path = KstatPath { module: "link".to_string(), @@ -516,12 +525,14 @@ mod tests { name: link.name.clone(), }; info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); diff --git a/oximeter/oximeter/schema/physical-data-link.toml b/oximeter/oximeter/schema/sled-data-link.toml similarity index 67% rename from oximeter/oximeter/schema/physical-data-link.toml rename to oximeter/oximeter/schema/sled-data-link.toml index d526aa6af19..f48f44d023c 100644 --- a/oximeter/oximeter/schema/physical-data-link.toml +++ b/oximeter/oximeter/schema/sled-data-link.toml @@ -1,45 +1,41 @@ format_version = 1 [target] -name = "physical_data_link" -description = "A physical network link on a compute sled" +name = "sled_data_link" +description = "A network data link on a compute sled" authz_scope = "fleet" versions = [ - { version = 1, fields = [ "rack_id", "sled_id", "hostname", "serial", "link_name" ] }, - # This is the intended next version, but actual schema updates are not yet - # supported. This is left here as an example and breadcrumb to implement - # that update in the future. - #{ version = 2, fields = [ "rack_id", "sled_id", "serial", "model", "revision", "link_name" ] }, + { version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial" ] }, ] +[fields.kind] +type = "string" +description = "The kind or class of the data link" + +[fields.link_name] +type = "string" +description = "Name of the data link" + [fields.rack_id] type = "uuid" -description = "UUID for the link's sled" +description = "ID for the link's rack" [fields.sled_id] type = "uuid" -description = "UUID for the link's sled" +description = "ID for the link's sled" -[fields.hostname] -type = "string" -description = "Hostname of the link's sled" - -[fields.model] +[fields.sled_model] type = "string" description = "Model number of the link's sled" -[fields.revision] +[fields.sled_revision] type = "u32" description = "Revision number of the sled" -[fields.serial] +[fields.sled_serial] type = "string" description = "Serial number of the sled" -[fields.link_name] -type = "string" -description = "Name of the physical data link" - [[metrics]] name = "bytes_sent" description = "Number of bytes sent on the link" diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index b56f5d9df09..0429741578d 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -11,6 +11,7 @@ use crate::common::instance::{ use crate::instance_manager::{ Error as ManagerError, InstanceManagerServices, InstanceTicket, }; +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use crate::params::ZoneBundleMetadata; use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; @@ -364,6 +365,11 @@ struct InstanceRunner { // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, + // TODO(ben) insert metrics handle + // - use in `setup_propolis_inner()` when creating zone + // - use in `terminate_inner()` when destroying + metrics_queue: MetricsRequestQueue, + // Object representing membership in the "instance manager". instance_ticket: InstanceTicket, } @@ -806,6 +812,15 @@ impl InstanceRunner { return; }; + // Ask the sled-agent's metrics task to stop tracking statistics for our + // control VNIC and any OPTE ports in the zone as well. + self.metrics_queue + .untrack_vnic(running_state.running_zone.control_vnic_name()) + .await; + for port in running_state.running_zone.opte_port_names() { + self.metrics_queue.untrack_opte_port(port).await; + } + // Take a zone bundle whenever this instance stops. if let Err(e) = self .zone_bundler @@ -1008,6 +1023,7 @@ impl Instance { storage, zone_bundler, zone_builder_factory, + metrics_queue, } = services; let mut dhcp_config = DhcpCfg { @@ -1097,6 +1113,7 @@ impl Instance { storage, zone_builder_factory, zone_bundler, + metrics_queue, instance_ticket: ticket, }; @@ -1395,8 +1412,11 @@ impl InstanceRunner { } async fn setup_propolis_inner(&mut self) -> Result { - // Create OPTE ports for the instance + // Create OPTE ports for the instance. We also store the names of all + // those ports to notify the metrics task to start collecting statistics + // for them. let mut opte_ports = Vec::with_capacity(self.requested_nics.len()); + let mut opte_port_names = Vec::with_capacity(self.requested_nics.len()); for nic in self.requested_nics.iter() { let (snat, ephemeral_ip, floating_ips) = if nic.primary { ( @@ -1416,6 +1436,7 @@ impl InstanceRunner { dhcp_config: self.dhcp_config.clone(), is_service: false, })?; + opte_port_names.push(port.0.name().to_string()); opte_ports.push(port); } @@ -1486,6 +1507,12 @@ impl InstanceRunner { ); profile.add_to_zone(&self.log, &installed_zone).await?; + // Collect the name of the control VNIC in the zone. + // + // We'll use this to notify the metrics task to start polling its + // statistics. + let vnic_name = installed_zone.get_control_vnic_name().to_string(); + let running_zone = RunningZone::boot(installed_zone).await?; info!(self.log, "Started propolis in zone: {}", zname); @@ -1498,6 +1525,12 @@ impl InstanceRunner { .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(self.log, "Propolis SMF service is online"); + // Notify the metrics task of the OPTE ports and VNIC. + self.metrics_queue.track_vnic(vnic_name).await; + for port in opte_port_names.into_iter() { + self.metrics_queue.track_opte_port(port).await; + } + // We use a custom client builder here because the default progenitor // one has a timeout of 15s but we want to be able to wait indefinitely. let reqwest_client = reqwest::ClientBuilder::new().build().unwrap(); @@ -1895,6 +1928,7 @@ mod tests { storage: storage_handle, zone_bundler, zone_builder_factory: ZoneBuilderFactory::fake(Some(temp_dir)), + metrics_queue: MetricsRequestQueue::for_test().0, } } @@ -2168,6 +2202,7 @@ mod tests { storage, zone_bundler, zone_builder_factory, + metrics_queue, } = fake_instance_manager_services( &log, storage_handle, @@ -2188,6 +2223,7 @@ mod tests { zone_bundler, zone_builder_factory, vmm_reservoir_manager, + metrics_queue, ) .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index baa377a0645..bb9303f5e28 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -6,6 +6,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use crate::params::InstanceExternalIpBody; use crate::params::InstanceMetadata; @@ -79,6 +80,7 @@ pub(crate) struct InstanceManagerServices { pub storage: StorageHandle, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, + pub metrics_queue: MetricsRequestQueue, } // Describes the internals of the "InstanceManager", though most of the @@ -108,6 +110,7 @@ impl InstanceManager { zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, vmm_reservoir_manager: VmmReservoirManagerHandle, + metrics_queue: MetricsRequestQueue, ) -> Result { let (tx, rx) = mpsc::channel(QUEUE_SIZE); let (terminate_tx, terminate_rx) = mpsc::unbounded_channel(); @@ -126,6 +129,7 @@ impl InstanceManager { storage, zone_bundler, zone_builder_factory, + metrics_queue, }; let runner_handle = @@ -452,6 +456,7 @@ struct InstanceManagerRunner { storage: StorageHandle, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, + metrics_queue: MetricsRequestQueue, } impl InstanceManagerRunner { @@ -637,6 +642,7 @@ impl InstanceManagerRunner { storage: self.storage.clone(), zone_bundler: self.zone_bundler.clone(), zone_builder_factory: self.zone_builder_factory.clone(), + metrics_queue: self.metrics_queue.clone(), }; let state = crate::instance::InstanceInitialState { diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index fcb260e93a1..f8887bc137b 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -6,122 +6,345 @@ use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::ProducerKind; -use oximeter::types::MetricsError; -use oximeter::types::ProducerRegistry; -use oximeter_instruments::kstat::link; +use omicron_common::api::internal::shared::SledIdentifiers; +use oximeter_instruments::kstat::link::sled_data_link::SledDataLink; use oximeter_instruments::kstat::CollectionDetails; use oximeter_instruments::kstat::Error as KstatError; use oximeter_instruments::kstat::KstatSampler; use oximeter_instruments::kstat::TargetId; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use sled_hardware_types::Baseboard; use slog::Logger; +use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::net::Ipv6Addr; use std::net::SocketAddr; -use std::sync::Arc; -use std::sync::Mutex; use std::time::Duration; +use tokio::sync::mpsc; use uuid::Uuid; /// The interval on which we ask `oximeter` to poll us for metric data. -pub(crate) const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30); +const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30); /// The interval on which we sample link metrics. -pub(crate) const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10); +// +// TODO(https://github.com/oxidecomputer/omicron/issues/5695) +// These should probably be sampled much densely. We may want to wait for +// https://github.com/oxidecomputer/omicron/issues/740, which would handle +// pagination between the producer and collector, as sampling at < 1s for many +// links could lead to quite large requests. Or we can eat the memory cost for +// now. +const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10); + +/// The interval after which we expire kstat-based collection of transient +/// links. +/// +/// This applies to VNICs and OPTE ports. Physical links are never expired, +/// since we should never expect them to disappear. While we strive to get +/// notifications before these transient links are deleted, it's always possible +/// we miss that, and so the data collection fails. If that fails for more than +/// this interval, we stop attempting to collect its data. +const TRANSIENT_LINK_EXPIRATION_INTERVAL: Duration = Duration::from_secs(60); /// The maximum Dropshot request size for the metrics server. const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024; +/// Size of the queue used to send messages to the metrics task. +const QUEUE_SIZE: usize = 64; + /// An error during sled-agent metric production. #[derive(Debug, thiserror::Error)] pub enum Error { #[error("Kstat-based metric failure")] Kstat(#[source] KstatError), - #[error("Failed to insert metric producer into registry")] - Registry(#[source] MetricsError), + #[error("Failed to start metric producer server")] + ProducerServer(#[source] oximeter_producer::Error), +} - #[error("Failed to fetch hostname")] - Hostname(#[source] std::io::Error), +/// Messages sent to the sled-agent metrics collection task. +/// +/// The sled-agent publish metrics to Oximeter, including statistics about +/// datalinks. This metrics task runs in the background, and code that creates +/// or deletes objects can notify this task to start / stop tracking statistics +/// for them. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(test, allow(dead_code))] +pub(crate) enum Message { + /// Start tracking the named physical link. + /// + /// This is only use on startup, to track the underlays. + TrackPhysical { name: String }, + /// Track the named VNIC. + TrackVnic { name: String }, + /// Stop tracking the named VNIC. + UntrackVnic { name: String }, + /// Track the named OPTE port. + TrackOptePort { name: String }, + /// Stop tracking the named OPTE port. + UntrackOptePort { name: String }, + // TODO-completeness: We will probably want to track other kinds of + // statistics here too. For example, we could send messages when a zone is + // created / destroyed to track zonestats; we might also want to support + // passing an explicit `oximeter::Producer` type in, so that other code + // could attach their own producers. +} - #[error("Non-UTF8 hostname")] - NonUtf8Hostname, +/// Helper to define kinds of tracked links. +struct LinkKind; - #[error("Missing NULL byte in hostname")] - HostnameMissingNull, +impl LinkKind { + const PHYSICAL: &'static str = "physical"; + const VNIC: &'static str = "vnic"; + const OPTE: &'static str = "opte"; +} - #[error("Failed to start metric producer server")] - ProducerServer(#[source] oximeter_producer::Error), +/// The main task used to collect and publish sled-agent metrics. +async fn metrics_task( + sled_identifiers: SledIdentifiers, + kstat_sampler: KstatSampler, + _server: ProducerServer, + log: Logger, + mut rx: mpsc::Receiver, +) { + let mut tracked_links: BTreeMap = BTreeMap::new(); + + // Main polling loop, waiting for messages from other pieces of the code to + // track various statistics. + loop { + let Some(message) = rx.recv().await else { + error!(log, "channel closed, exiting"); + return; + }; + trace!(log, "received message"; "message" => ?message); + match message { + Message::TrackPhysical { name } => { + let link = SledDataLink { + kind: LinkKind::PHYSICAL.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::TrackVnic { name } => { + let link = SledDataLink { + kind: LinkKind::VNIC.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::UntrackVnic { name } => { + remove_datalink(&log, &mut tracked_links, &kstat_sampler, name) + .await + } + Message::TrackOptePort { name } => { + let link = SledDataLink { + kind: LinkKind::OPTE.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::UntrackOptePort { name } => { + remove_datalink(&log, &mut tracked_links, &kstat_sampler, name) + .await + } + } + } } -// Basic metadata about the sled agent used when publishing metrics. -#[derive(Clone, Debug)] -struct SledIdentifiers { - sled_id: Uuid, - rack_id: Uuid, - baseboard: Baseboard, +/// Stop tracking a link by name. +async fn remove_datalink( + log: &Logger, + tracked_links: &mut BTreeMap, + kstat_sampler: &KstatSampler, + name: String, +) { + match tracked_links.remove(&name) { + Some(id) => match kstat_sampler.remove_target(id).await { + Ok(_) => { + debug!( + log, + "Removed VNIC from tracked links"; + "link_name" => name, + ); + } + Err(err) => { + error!( + log, + "Failed to remove VNIC from kstat sampler, \ + metrics may still be produced for it"; + "link_name" => name, + "error" => ?err, + ); + } + }, + None => { + debug!( + log, + "received message to delete VNIC, but \ + it is not in the list of tracked links"; + "link_name" => name, + ); + } + } } -/// Type managing all oximeter metrics produced by the sled-agent. -// -// TODO-completeness: We probably want to get kstats or other metrics in to this -// type from other parts of the code, possibly before the `SledAgent` itself -// exists. This is similar to the storage resources or other objects, most of -// which are essentially an `Arc>`. It would be nice to avoid that -// pattern, but until we have more statistics, it's not clear whether that's -// worth it right now. -#[derive(Clone)] +/// Start tracking a new link of the specified kind. +async fn add_datalink( + log: &Logger, + tracked_links: &mut BTreeMap, + kstat_sampler: &KstatSampler, + link: SledDataLink, +) { + let kind = link.kind.clone(); + match tracked_links.entry(link.link_name.to_string()) { + Entry::Vacant(entry) => { + let details = if is_transient_link(&kind) { + CollectionDetails::duration( + LINK_SAMPLE_INTERVAL, + TRANSIENT_LINK_EXPIRATION_INTERVAL, + ) + } else { + CollectionDetails::never(LINK_SAMPLE_INTERVAL) + }; + match kstat_sampler.add_target(link, details).await { + Ok(id) => { + debug!( + log, + "Added new link to kstat sampler"; + "link_name" => entry.key(), + "link_kind" => ?kind, + ); + entry.insert(id); + } + Err(err) => { + error!( + log, + "Failed to add VNIC to kstat sampler, \ + no metrics will be collected for it"; + "error" => ?err, + ); + } + } + } + Entry::Occupied(entry) => { + debug!( + log, + "received message to track VNIC, \ + but it is already being tracked"; + "link_name" => entry.key(), + ); + } + } +} + +/// Return true if this is considered a transient link, from the perspective of +/// its expiration behavior. +fn is_transient_link(kind: &str) -> bool { + kind == LinkKind::VNIC || kind == LinkKind::OPTE +} + +/// Manages sled-based metrics reported to Oximeter. +/// +/// This object is used to sample kernel statistics and produce other Oximeter +/// metrics for the sled agent. It runs a small background task responsible for +/// actually generating / reporting samples. Users operate with it through the +/// `MetricsHandle`. +#[derive(Debug)] pub struct MetricsManager { - metadata: Arc, - _log: Logger, - kstat_sampler: KstatSampler, - // TODO-scalability: We may want to generalize this to store any kind of - // tracked target, and use a naming scheme that allows us pick out which - // target we're interested in from the arguments. - // - // For example, we can use the link name to do this, for any physical or - // virtual link, because they need to be unique. We could also do the same - // for disks or memory. If we wanted to guarantee uniqueness, we could - // namespace them internally, e.g., `"datalink:{link_name}"` would be the - // real key. - tracked_links: Arc>>, - producer_server: Arc, + /// Receive-side of a channel used to pass the background task messages. + #[cfg_attr(test, allow(dead_code))] + tx: mpsc::Sender, + /// The background task itself. + _task: tokio::task::JoinHandle<()>, } impl MetricsManager { /// Construct a new metrics manager. - /// - /// This takes a few key pieces of identifying information that are used - /// when reporting sled-specific metrics. pub fn new( - sled_id: Uuid, - rack_id: Uuid, - baseboard: Baseboard, - sled_address: Ipv6Addr, - log: Logger, + log: &Logger, + identifiers: SledIdentifiers, + address: Ipv6Addr, ) -> Result { - let producer_server = - start_producer_server(&log, sled_id, sled_address)?; - let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?; - producer_server + let sampler = KstatSampler::new(log).map_err(Error::Kstat)?; + let server = start_producer_server(&log, identifiers.sled_id, address)?; + server .registry() - .register_producer(kstat_sampler.clone()) - .map_err(Error::Registry)?; - let tracked_links = Arc::new(Mutex::new(BTreeMap::new())); - Ok(Self { - metadata: Arc::new(SledIdentifiers { sled_id, rack_id, baseboard }), - _log: log, - kstat_sampler, - tracked_links, - producer_server, - }) + .register_producer(sampler.clone()) + .expect("actually infallible"); + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + let task_log = log.new(o!("component" => "metrics-task")); + let _task = tokio::task::spawn(metrics_task( + identifiers, + sampler, + server, + task_log, + rx, + )); + Ok(Self { tx, _task }) } - /// Return a reference to the contained producer registry. - pub fn registry(&self) -> &ProducerRegistry { - self.producer_server.registry() + /// Return a queue that can be used to send requests to the metrics task. + pub fn request_queue(&self) -> MetricsRequestQueue { + MetricsRequestQueue(self.tx.clone()) + } +} + +/// A cheap handle used to send requests to the metrics task. +#[derive(Clone, Debug)] +pub struct MetricsRequestQueue(mpsc::Sender); + +impl MetricsRequestQueue { + #[cfg(test)] + #[allow(dead_code)] + /// Return both halves of the queue used to send messages to the collection + /// task, for use in testing. + pub(crate) fn for_test() -> (Self, mpsc::Receiver) { + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + (Self(tx), rx) + } + + /// Ask the task to start tracking the named physical datalink. + pub async fn track_physical(&self, name: impl Into) { + let _ = self.0.send(Message::TrackPhysical { name: name.into() }).await; + } + + /// Ask the task to start tracking the named VNIC. + pub async fn track_vnic(&self, name: impl Into) { + let _ = self.0.send(Message::TrackVnic { name: name.into() }).await; + } + + /// Ask the task to stop tracking the named VNIC. + pub async fn untrack_vnic(&self, name: impl Into) { + let _ = self.0.send(Message::UntrackVnic { name: name.into() }).await; + } + + /// Ask the task to start tracking the named OPTE port. + pub async fn track_opte_port(&self, name: impl Into) { + let _ = self.0.send(Message::TrackOptePort { name: name.into() }).await; + } + + /// Ask the task to stop tracking the named OPTE port. + pub async fn untrack_opte_port(&self, name: impl Into) { + let _ = + self.0.send(Message::UntrackOptePort { name: name.into() }).await; } } @@ -130,9 +353,8 @@ fn start_producer_server( log: &Logger, sled_id: Uuid, sled_address: Ipv6Addr, -) -> Result, Error> { +) -> Result { let log = log.new(slog::o!("component" => "producer-server")); - let registry = ProducerRegistry::with_id(sled_id); // Listen on any available socket, using our underlay address. let address = SocketAddr::new(sled_address.into(), 0); @@ -141,7 +363,7 @@ fn start_producer_server( let registration_address = None; let config = oximeter_producer::Config { server_info: ProducerEndpoint { - id: registry.producer_id(), + id: sled_id, kind: ProducerKind::SledAgent, address, interval: METRIC_COLLECTION_INTERVAL, @@ -150,84 +372,5 @@ fn start_producer_server( request_body_max_bytes: METRIC_REQUEST_MAX_SIZE, log: LogConfig::Logger(log), }; - ProducerServer::start(&config).map(Arc::new).map_err(Error::ProducerServer) -} - -impl MetricsManager { - /// Track metrics for a physical datalink. - pub async fn track_physical_link( - &self, - link_name: impl AsRef, - interval: Duration, - ) -> Result<(), Error> { - let hostname = hostname()?; - let link = link::physical_data_link::PhysicalDataLink { - rack_id: self.metadata.rack_id, - sled_id: self.metadata.sled_id, - serial: self.serial_number().into(), - hostname: hostname.into(), - link_name: link_name.as_ref().to_string().into(), - }; - let details = CollectionDetails::never(interval); - let id = self - .kstat_sampler - .add_target(link, details) - .await - .map_err(Error::Kstat)?; - self.tracked_links - .lock() - .unwrap() - .insert(link_name.as_ref().to_string(), id); - Ok(()) - } - - /// Stop tracking metrics for a datalink. - /// - /// This works for both physical and virtual links. - #[allow(dead_code)] - pub async fn stop_tracking_link( - &self, - link_name: impl AsRef, - ) -> Result<(), Error> { - let maybe_id = - self.tracked_links.lock().unwrap().remove(link_name.as_ref()); - if let Some(id) = maybe_id { - self.kstat_sampler.remove_target(id).await.map_err(Error::Kstat) - } else { - Ok(()) - } - } - - // Return the serial number out of the baseboard, if one exists. - fn serial_number(&self) -> String { - match &self.metadata.baseboard { - Baseboard::Gimlet { identifier, .. } => identifier.clone(), - Baseboard::Unknown => String::from("unknown"), - Baseboard::Pc { identifier, .. } => identifier.clone(), - } - } -} - -// Return the current hostname if possible. -fn hostname() -> Result { - // See netdb.h - const MAX_LEN: usize = 256; - let mut out = vec![0u8; MAX_LEN + 1]; - if unsafe { - libc::gethostname(out.as_mut_ptr() as *mut libc::c_char, MAX_LEN) - } == 0 - { - // Split into subslices by NULL bytes. - // - // We should have a NULL byte, since we've asked for no more than 255 - // bytes in a 256 byte buffer, but you never know. - let Some(chunk) = out.split(|x| *x == 0).next() else { - return Err(Error::HostnameMissingNull); - }; - let s = std::ffi::CString::new(chunk) - .map_err(|_| Error::NonUtf8Hostname)?; - s.into_string().map_err(|_| Error::NonUtf8Hostname) - } else { - Err(std::io::Error::last_os_error()).map_err(|_| Error::NonUtf8Hostname) - } + ProducerServer::start(&config).map_err(Error::ProducerServer) } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 9451484f21b..aa118dfbe47 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -1,3 +1,4 @@ +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; @@ -59,6 +60,7 @@ pub(crate) struct ProbeManagerInner { vnic_allocator: VnicAllocator, storage: StorageHandle, port_manager: PortManager, + metrics_queue: MetricsRequestQueue, running_probes: Mutex, } @@ -69,6 +71,7 @@ impl ProbeManager { etherstub: Etherstub, storage: StorageHandle, port_manager: PortManager, + metrics_queue: MetricsRequestQueue, log: Logger, ) -> Self { Self { @@ -87,6 +90,7 @@ impl ProbeManager { sled_id, storage, port_manager, + metrics_queue, }), } } @@ -345,6 +349,13 @@ impl ProbeManagerInner { rz.ensure_address_for_port("overlay", 0).await?; info!(self.log, "started probe {}", probe.id); + // Notify the sled-agent's metrics task to start tracking the VNIC and + // any OPTE ports in the zone. + self.metrics_queue.track_vnic(rz.control_vnic_name()).await; + for port in rz.opte_port_names() { + self.metrics_queue.track_opte_port(port).await; + } + self.running_probes.lock().await.zones.insert(probe.id, rz); Ok(()) @@ -375,12 +386,26 @@ impl ProbeManagerInner { ) { match probes.zones.remove(&id) { Some(mut running_zone) => { + // TODO-correctness: There are no physical links in the zone, is + // this intended to delete the control VNIC? for l in running_zone.links_mut() { if let Err(e) = l.delete() { error!(self.log, "delete probe link {}: {e}", l.name()); } } + + // Ask the sled-agent to stop tracking our control VNIC. + self.metrics_queue + .untrack_vnic(running_zone.control_vnic_name()) + .await; + + // Ask the sled-agent to stop tracking our OPTE ports, then + // delete them entirely. + for port in running_zone.opte_port_names() { + self.metrics_queue.untrack_opte_port(port).await; + } running_zone.release_opte_ports(); + if let Err(e) = running_zone.stop().await { error!(self.log, "stop probe: {e}") } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 42466ce094b..6a350067548 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -30,6 +30,7 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::BootstrapNetworking; use crate::config::SidecarRevision; +use crate::metrics::MetricsRequestQueue; use crate::params::{ DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt, TimeSync, ZoneBundleCause, ZoneBundleMetadata, @@ -678,6 +679,7 @@ struct SledAgentInfo { underlay_address: Ipv6Addr, rack_id: Uuid, rack_network_config: Option, + metrics_queue: MetricsRequestQueue, } pub(crate) enum TimeSyncConfig { @@ -897,13 +899,14 @@ impl ServiceManager { /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. - pub fn sled_agent_started( + pub async fn sled_agent_started( &self, config: Config, port_manager: PortManager, underlay_address: Ipv6Addr, rack_id: Uuid, rack_network_config: Option, + metrics_queue: MetricsRequestQueue, ) -> Result<(), Error> { info!(&self.inner.log, "sled agent started"; "underlay_address" => underlay_address.to_string()); self.inner @@ -918,10 +921,25 @@ impl ServiceManager { underlay_address, rack_id, rack_network_config, + metrics_queue: metrics_queue.clone(), }) .map_err(|_| "already set".to_string()) .expect("Sled Agent should only start once"); + // At this point, we've already started up the switch zone, but the + // VNICs inside it cannot have been tracked by the sled-agent's metrics + // task (the sled-agent didn't exist at the time we started the switch + // zone!). Notify that task about the zone's VNICs now. + if let SwitchZoneState::Running { zone, .. } = + &*self.inner.switch_zone.lock().await + { + if let Some(name) = zone.bootstrap_vnic_name() { + metrics_queue.track_vnic(name).await; + } + metrics_queue.track_vnic(zone.control_vnic_name()).await; + // There are no OPTE ports in the switch zone. + } + Ok(()) } @@ -1510,7 +1528,7 @@ impl ServiceManager { ServiceBuilder::new("network/dns/client") .add_instance(ServiceInstanceBuilder::new("default")); - match &request { + let running_zone = match &request { ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: OmicronZoneConfig { @@ -1561,7 +1579,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup clickhouse profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1617,7 +1635,7 @@ impl ServiceManager { err, ) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1691,7 +1709,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup CRDB profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1749,7 +1767,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup crucible profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1797,8 +1815,7 @@ impl ServiceManager { .add_to_zone(&self.inner.log, &installed_zone) .await .map_err(|err| Error::io("crucible pantry profile", err))?; - let running_zone = RunningZone::boot(installed_zone).await?; - return Ok(running_zone); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -1846,7 +1863,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Oximeter profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -1912,7 +1929,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup External DNS profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2023,7 +2040,7 @@ impl ServiceManager { Error::io("Failed to set up NTP profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2103,7 +2120,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Internal DNS profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2251,7 +2268,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Nexus profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Switch(SwitchZoneConfigLocal { zone: SwitchZoneConfig { id, services, addresses }, @@ -2950,9 +2967,33 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Switch zone profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? + } + }; + + // Now that we've booted the zone, we'll notify the sled-agent about: + // + // - Its control VNIC (all zones have one) + // - Any bootstrap network VNIC (only the switch zone has one) + // - Any OPTE ports (instance zones, or Oxide zones with external + // connectivity). + // + // Note that we'll almost always have started the sled-agent at this + // point. The only exception is the switch zone, during bootstrapping + // but before we've either run RSS or unlocked the rack. In both those + // cases, we have a `StartSledAgentRequest`, and so a metrics queue. + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + queue.track_vnic(running_zone.control_vnic_name()).await; + if let Some(bootstrap_vnic) = running_zone.bootstrap_vnic_name() { + queue.track_vnic(bootstrap_vnic).await; + } + for port in running_zone.opte_port_names() { + queue.track_opte_port(port).await; } } + Ok(running_zone) } // Ensures that a single Omicron zone is running. @@ -3317,6 +3358,16 @@ impl ServiceManager { ); return; }; + // Ensure that the sled agent's metrics task is not tracking the zone's + // VNICs or OPTE ports. + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + queue.untrack_vnic(zone.runtime.control_vnic_name()).await; + for port in zone.runtime.opte_port_names() { + queue.untrack_opte_port(port).await; + } + } debug!( log, "removing an existing zone"; @@ -3354,6 +3405,12 @@ impl ServiceManager { "zone" => &zone_name, "state" => ?zone.state(), ); + // NOTE: We might want to tell the sled-agent's metrics task to + // stop tracking any links in this zone. However, we don't have + // very easy access to them, without running a command in the + // zone. These links are about to be deleted, and the metrics + // task will expire them after a while anyway, but it might be + // worth the trouble to do that in the future. if let Err(e) = Zones::halt_and_remove_logged(&self.inner.log, &zone_name) .await @@ -4237,6 +4294,18 @@ impl ServiceManager { } (SwitchZoneState::Running { zone, .. }, None) => { info!(log, "Disabling {zone_typestr} zone (was running)"); + + // Notify the sled-agent's metrics task to stop collecting from + // the VNICs in the zone (if the agent exists). + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + if let Some(name) = zone.bootstrap_vnic_name() { + queue.untrack_vnic(name).await; + } + queue.untrack_vnic(zone.control_vnic_name()).await; + } + let _ = zone.stop().await; *sled_zone = SwitchZoneState::Disabled; } @@ -4309,6 +4378,8 @@ impl ServiceManager { #[cfg(all(test, target_os = "illumos"))] mod test { + use crate::metrics; + use super::*; use illumos_utils::{ dladm::{ @@ -4320,8 +4391,12 @@ mod test { }; use sled_storage::manager_test_harness::StorageManagerTestHarness; - use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; + use std::{ + net::{Ipv6Addr, SocketAddrV6}, + time::Duration, + }; + use tokio::sync::mpsc::error::TryRecvError; use uuid::Uuid; // Just placeholders. Not used. @@ -4331,6 +4406,10 @@ mod test { const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_ntp"; const EXPECTED_PORT: u16 = 12223; + // Timeout within which we must have received a message about a zone's links + // to track. This is very generous. + const LINK_NOTIFICATION_TIMEOUT: Duration = Duration::from_secs(5); + fn make_bootstrap_networking_config() -> BootstrapNetworking { BootstrapNetworking { bootstrap_etherstub: Etherstub( @@ -4596,7 +4675,7 @@ mod test { // // This will shut down all allocated zones, and delete their // associated VNICs. - fn drop_service_manager(mgr: ServiceManager) { + async fn drop_service_manager(mgr: ServiceManager) { let halt_ctx = MockZones::halt_and_remove_logged_context(); halt_ctx.expect().returning(|_, name| { assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); @@ -4605,6 +4684,16 @@ mod test { let delete_vnic_ctx = MockDladm::delete_vnic_context(); delete_vnic_ctx.expect().returning(|_| Ok(())); + // Also send a message to the metrics task that the VNIC has been + // deleted. + mgr.inner + .sled_info + .get() + .unwrap() + .metrics_queue + .untrack_vnic("oxControlService0") + .await; + // Explicitly drop the service manager drop(mgr); } @@ -4715,10 +4804,11 @@ mod test { mgr } - fn sled_agent_started( + async fn sled_agent_started( log: &slog::Logger, test_config: &TestConfig, mgr: &ServiceManager, + metrics_queue: MetricsRequestQueue, ) { let port_manager = PortManager::new( log.new(o!("component" => "PortManager")), @@ -4733,7 +4823,9 @@ mod test { Ipv6Addr::LOCALHOST, Uuid::new_v4(), None, + metrics_queue, ) + .await .unwrap(); } } @@ -4746,7 +4838,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v1 = Generation::new(); let found = @@ -4770,7 +4869,23 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // Check that we received a message about the zone's VNIC. + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!( + message, + metrics::Message::TrackVnic { name: "oxControlService0".into() }, + ); + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4787,7 +4902,14 @@ mod test { let mgr = helper.new_service_manager_with_timesync(TimeSyncConfig::Fail); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v1 = Generation::new(); let found = @@ -4822,6 +4944,10 @@ mod test { Error::TimeNotSynchronized ); + // Ensure we have _not_ received a message about the zone's VNIC, + // because there isn't a zone. + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + // Next, ensure this still converts to an "unavail" common error let common_err = omicron_common::api::external::Error::from(err); assert_matches::assert_matches!( @@ -4846,7 +4972,7 @@ mod test { .await .unwrap(); - drop_service_manager(mgr); + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); } @@ -4860,7 +4986,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v2 = Generation::new().next(); let id = Uuid::new_v4(); @@ -4874,7 +5007,25 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // In this case, the manager creates the zone once, and then "ensuring" + // it a second time is a no-op. So we simply expect the same message + // sequence as starting a zone for the first time. + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!( + message, + metrics::Message::TrackVnic { name: "oxControlService0".into() }, + ); + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4892,7 +5043,14 @@ mod test { // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v2 = Generation::new().next(); let id = Uuid::new_v4(); @@ -4903,13 +5061,43 @@ mod test { String::from(test_config.config_dir.path().as_str()), ) .await; - drop_service_manager(mgr); + drop_service_manager(mgr).await; + + // Check that we received a message about the zone's VNIC. Since the + // manager is being dropped, it should also send a message about the + // VNIC being deleted. + for expected_message in [ + metrics::Message::TrackVnic { name: "oxControlService0".into() }, + metrics::Message::UntrackVnic { name: "oxControlService0".into() }, + ] { + println!("Expecting message from manager: {expected_message:#?}"); + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!(message, expected_message,); + } + // Note that the manager has been dropped, so we should get + // disconnected, not empty. + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Disconnected)); // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. let _expectations = expect_new_service(EXPECTED_ZONE_NAME_PREFIX); let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let found = @@ -4918,7 +5106,13 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // Note that the `omicron_zones_list()` request just returns the + // configured zones, stored in the on-disk ledger. There is nothing + // above that actually ensures that those zones exist, as far as I can + // tell! + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4936,7 +5130,14 @@ mod test { // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; let v1 = Generation::new(); let v2 = v1.next(); @@ -4948,7 +5149,7 @@ mod test { String::from(test_config.config_dir.path().as_str()), ) .await; - drop_service_manager(mgr); + drop_service_manager(mgr).await; // Next, delete the ledger. This means the zone we just created will not // be remembered on the next initialization. @@ -4959,14 +5160,21 @@ mod test { // Observe that the old service is not re-initialized. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; let found = mgr.omicron_zones_list().await.expect("failed to list zones"); assert_eq!(found.generation, v1); assert!(found.zones.is_empty()); - drop_service_manager(mgr); + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4981,7 +5189,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; // Like the normal tests, set up a generation with one zone in it. let v1 = Generation::new(); @@ -5086,7 +5301,7 @@ mod test { found_zones.sort_by(|a, b| a.id.cmp(&b.id)); assert_eq!(our_zones, found_zones); - drop_service_manager(mgr); + drop_service_manager(mgr).await; illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); helper.cleanup().await; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 92dcfd2dfe0..ddf40886782 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -63,7 +63,6 @@ use omicron_common::backoff::{ use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; -use oximeter::types::ProducerRegistry; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; @@ -336,7 +335,7 @@ struct SledAgentInner { bootstore: bootstore::NodeHandle, // Object handling production of metrics for oximeter. - metrics_manager: MetricsManager, + _metrics_manager: MetricsManager, // Handle to the traffic manager for writing OS updates to our boot disks. boot_disk_os_writer: BootDiskOsWriter, @@ -434,38 +433,20 @@ impl SledAgent { illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?; // Start collecting metric data. - // - // First, we're creating a shareable type for managing the metrics - // themselves early on, so that we can pass it to other components of - // the sled agent that need it. - // - // Then we'll start tracking physical links and register as a producer - // with Nexus in the background. - let metrics_manager = MetricsManager::new( - request.body.id, - request.body.rack_id, - long_running_task_handles.hardware_manager.baseboard(), - *sled_address.ip(), - log.new(o!("component" => "MetricsManager")), - )?; + let baseboard = long_running_task_handles.hardware_manager.baseboard(); + let identifiers = SledIdentifiers { + rack_id: request.body.rack_id, + sled_id: request.body.id, + model: baseboard.model().to_string(), + revision: baseboard.revision(), + serial: baseboard.identifier().to_string(), + }; + let metrics_manager = + MetricsManager::new(&log, identifiers, *sled_address.ip())?; // Start tracking the underlay physical links. - for nic in underlay::find_nics(&config.data_links)? { - let link_name = nic.interface(); - if let Err(e) = metrics_manager - .track_physical_link( - link_name, - crate::metrics::LINK_SAMPLE_INTERVAL, - ) - .await - { - error!( - log, - "failed to start tracking physical link metrics"; - "link_name" => link_name, - "error" => ?e, - ); - } + for link in underlay::find_chelsio_links(&config.data_links)? { + metrics_manager.request_queue().track_physical(&link.0).await; } // Create the PortManager to manage all the OPTE ports on the sled. @@ -496,6 +477,7 @@ impl SledAgent { long_running_task_handles.zone_bundler.clone(), ZoneBuilderFactory::default(), vmm_reservoir_manager.clone(), + metrics_manager.request_queue(), )?; let update_config = ConfigUpdates { @@ -551,13 +533,16 @@ impl SledAgent { network config from bootstore", ); - services.sled_agent_started( - svc_config, - port_manager.clone(), - *sled_address.ip(), - request.body.rack_id, - rack_network_config.clone(), - )?; + services + .sled_agent_started( + svc_config, + port_manager.clone(), + *sled_address.ip(), + request.body.rack_id, + rack_network_config.clone(), + metrics_manager.request_queue(), + ) + .await?; // Spawn a background task for managing notifications to nexus // about this sled-agent. @@ -581,6 +566,7 @@ impl SledAgent { etherstub.clone(), storage_manager.clone(), port_manager.clone(), + metrics_manager.request_queue(), log.new(o!("component" => "ProbeManager")), ); @@ -604,7 +590,7 @@ impl SledAgent { rack_network_config, zone_bundler: long_running_task_handles.zone_bundler.clone(), bootstore: long_running_task_handles.bootstore.clone(), - metrics_manager, + _metrics_manager: metrics_manager, boot_disk_os_writer: BootDiskOsWriter::new(&parent_log), }), log: log.clone(), @@ -1193,11 +1179,6 @@ impl SledAgent { self.inner.port_manager.vpc_routes_ensure(routes).map_err(Error::from) } - /// Return the metric producer registry. - pub fn metrics_registry(&self) -> &ProducerRegistry { - self.inner.metrics_manager.registry() - } - pub(crate) fn storage(&self) -> &StorageHandle { &self.inner.storage }