diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 1e8a7e70bf..b93353eb18 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -17,11 +17,6 @@ pub const VNIC_PREFIX: &str = "ox"; pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; pub const VNIC_PREFIX_BOOTSTRAP: &str = "oxBootstrap"; -/// Prefix used to name VNICs over xde devices / OPTE ports. -// TODO-correctness: Remove this when `xde` devices can be directly used beneath -// Viona, and thus plumbed directly to guests. -pub const VNIC_PREFIX_GUEST: &str = "vopte"; - /// Path to the DLADM command. pub const DLADM: &str = "/usr/sbin/dladm"; @@ -42,6 +37,9 @@ pub const BOOTSTRAP_ETHERSTUB_VNIC_NAME: &str = "bootstrap0"; /// The prefix for Chelsio link names. pub const CHELSIO_LINK_PREFIX: &str = "cxgbe"; +/// The prefix for OPTE link names +pub const OPTE_LINK_PREFIX: &str = "opte"; + /// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] pub enum FindPhysicalLinkError { diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 871ba55e75..5d0c80b0e1 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -7,7 +7,7 @@ use crate::destructor::{Deletable, Destructor}; use crate::dladm::{ CreateVnicError, DeleteVnicError, VnicSource, VNIC_PREFIX, - VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, + VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, }; use omicron_common::api::external::MacAddr; use std::sync::{ @@ -125,7 +125,6 @@ impl VnicAllocator
{ pub enum LinkKind { Physical, OxideControlVnic, - GuestVnic, OxideBootstrapVnic, } @@ -135,14 +134,20 @@ impl LinkKind { pub fn from_name(name: &str) -> Option { if name.starts_with(VNIC_PREFIX) { Some(LinkKind::OxideControlVnic) - } else if name.starts_with(VNIC_PREFIX_GUEST) { - Some(LinkKind::GuestVnic) } else if name.starts_with(VNIC_PREFIX_BOOTSTRAP) { Some(LinkKind::OxideBootstrapVnic) } else { None } } + + /// Return `true` if this link is a VNIC. + pub const fn is_vnic(&self) -> bool { + match self { + LinkKind::Physical => false, + LinkKind::OxideControlVnic | LinkKind::OxideBootstrapVnic => true, + } + } } #[derive(thiserror::Error, Debug)] @@ -203,6 +208,11 @@ impl Link { pub fn kind(&self) -> LinkKind { self.kind } + + /// Return true if this is a VNIC. + pub fn is_vnic(&self) -> bool { + self.kind.is_vnic() + } } impl Drop for Link { diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index b6d28d1b06..93c646cfab 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,6 +4,7 @@ //! Manager for all OPTE ports on a Helios system +use crate::dladm::OPTE_LINK_PREFIX; use crate::opte::opte_firewall_rules; use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; @@ -52,9 +53,6 @@ use std::sync::Arc; use std::sync::Mutex; use uuid::Uuid; -// Prefix used to identify xde data links. -const XDE_LINK_PREFIX: &str = "opte"; - /// Stored routes (and usage count) for a given VPC/subnet. #[derive(Debug, Clone)] struct RouteSet { @@ -85,7 +83,7 @@ impl PortManagerInner { fn next_port_name(&self) -> String { format!( "{}{}", - XDE_LINK_PREFIX, + OPTE_LINK_PREFIX, self.next_port_id.fetch_add(1, Ordering::SeqCst) ) } @@ -265,8 +263,9 @@ impl PortManager { // So we: // // - create the xde device - // - create the vnic, cleaning up the xde device if that fails - // - add both to the Port + // - create the port ticket + // - create the port + // - add both to the PortManager's map // // The Port object's drop implementation will clean up both of those, if // any of the remaining fallible operations fail. @@ -289,21 +288,6 @@ impl PortManager { )?; hdl }; - - // Initialize firewall rules for the new port. - let rules = opte_firewall_rules(firewall_rules, &vni, &mac); - debug!( - self.inner.log, - "Setting firewall rules"; - "port_name" => &port_name, - "rules" => ?&rules, - ); - #[cfg(target_os = "illumos")] - hdl.set_fw_rules(&oxide_vpc::api::SetFwRulesReq { - port_name: port_name.clone(), - rules, - })?; - let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new(nic.id, nic.kind, self.inner.clone()); @@ -326,6 +310,20 @@ impl PortManager { (port, ticket) }; + // Initialize firewall rules for the new port. + let rules = opte_firewall_rules(firewall_rules, &vni, &mac); + debug!( + self.inner.log, + "Setting firewall rules"; + "port_name" => &port_name, + "rules" => ?&rules, + ); + #[cfg(target_os = "illumos")] + hdl.set_fw_rules(&oxide_vpc::api::SetFwRulesReq { + port_name: port_name.clone(), + rules, + })?; + // Check locally to see whether we have any routes from the // control plane for this port already installed. If not, // create a record to show that we're interested in receiving diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 605809f019..5dbe4338cf 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -362,6 +362,22 @@ impl RunningZone { self.inner.zonepath.pool.as_ref() } + /// Return the name of a bootstrap VNIC in the zone, if any. + pub fn bootstrap_vnic_name(&self) -> Option<&str> { + self.inner.get_bootstrap_vnic_name() + } + + /// Return the name of the control VNIC. + pub fn control_vnic_name(&self) -> &str { + self.inner.get_control_vnic_name() + } + + /// Return the names of any OPTE ports in the zone. + pub fn opte_port_names(&self) -> impl Iterator { + self.inner.opte_ports().map(|port| port.name()) + } + + /// Return the control IP address. pub fn control_interface(&self) -> AddrObject { AddrObject::new( self.inner.get_control_vnic_name(), @@ -939,10 +955,17 @@ impl InstalledZone { zone_name } + /// Get the name of the bootstrap VNIC in the zone, if any. + pub fn get_bootstrap_vnic_name(&self) -> Option<&str> { + self.bootstrap_vnic.as_ref().map(|link| link.name()) + } + + /// Get the name of the control VNIC in the zone. pub fn get_control_vnic_name(&self) -> &str { self.control_vnic.name() } + /// Return the name of the zone itself. pub fn name(&self) -> &str { &self.name } diff --git a/oximeter/db/schema/replicated/8/timeseries-to-delete.txt b/oximeter/db/schema/replicated/8/timeseries-to-delete.txt new file mode 100644 index 0000000000..969e18f72d --- /dev/null +++ b/oximeter/db/schema/replicated/8/timeseries-to-delete.txt @@ -0,0 +1,6 @@ +physical_data_link:bytes_received +physical_data_link:bytes_sent +physical_data_link:errors_received +physical_data_link:errors_sent +physical_data_link:packets_received +physical_data_link:packets_sent diff --git a/oximeter/db/schema/single-node/8/timeseries-to-delete.txt b/oximeter/db/schema/single-node/8/timeseries-to-delete.txt new file mode 100644 index 0000000000..969e18f72d --- /dev/null +++ b/oximeter/db/schema/single-node/8/timeseries-to-delete.txt @@ -0,0 +1,6 @@ +physical_data_link:bytes_received +physical_data_link:bytes_sent +physical_data_link:errors_received +physical_data_link:errors_sent +physical_data_link:packets_received +physical_data_link:packets_sent diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 878017797a..05667058b5 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 7; +pub const OXIMETER_VERSION: u64 = 8; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/instruments/src/kstat/link.rs b/oximeter/instruments/src/kstat/link.rs index 0507594056..0317b43839 100644 --- a/oximeter/instruments/src/kstat/link.rs +++ b/oximeter/instruments/src/kstat/link.rs @@ -17,7 +17,7 @@ use kstat_rs::Named; use oximeter::types::Cumulative; use oximeter::Sample; -oximeter::use_timeseries!("physical-data-link.toml"); +oximeter::use_timeseries!("sled-data-link.toml"); // Helper function to extract the same kstat metrics from all link targets. fn extract_link_kstats( @@ -32,7 +32,7 @@ where let Named { name, value } = named_data; if *name == "rbytes64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::BytesReceived { + let metric = sled_data_link::BytesReceived { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -40,7 +40,7 @@ where })) } else if *name == "obytes64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::BytesSent { + let metric = sled_data_link::BytesSent { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -48,7 +48,7 @@ where })) } else if *name == "ipackets64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::PacketsReceived { + let metric = sled_data_link::PacketsReceived { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -56,7 +56,7 @@ where })) } else if *name == "opackets64" { Some(value.as_u64().and_then(|x| { - let metric = physical_data_link::PacketsSent { + let metric = sled_data_link::PacketsSent { datum: Cumulative::with_start_time(creation_time, x), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -64,7 +64,7 @@ where })) } else if *name == "ierrors" { Some(value.as_u32().and_then(|x| { - let metric = physical_data_link::ErrorsReceived { + let metric = sled_data_link::ErrorsReceived { datum: Cumulative::with_start_time(creation_time, x.into()), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -72,7 +72,7 @@ where })) } else if *name == "oerrors" { Some(value.as_u32().and_then(|x| { - let metric = physical_data_link::ErrorsSent { + let metric = sled_data_link::ErrorsSent { datum: Cumulative::with_start_time(creation_time, x.into()), }; Sample::new_with_timestamp(snapshot_time, target, &metric) @@ -88,7 +88,7 @@ trait LinkKstatTarget: KstatTarget { fn link_name(&self) -> &str; } -impl LinkKstatTarget for physical_data_link::PhysicalDataLink { +impl LinkKstatTarget for sled_data_link::SledDataLink { fn link_name(&self) -> &str { &self.link_name } @@ -155,6 +155,11 @@ mod tests { const RACK_ID: Uuid = uuid!("de784702-cafb-41a9-b3e5-93af189def29"); const SLED_ID: Uuid = uuid!("88240343-5262-45f4-86f1-3c82fe383f2a"); + const SLED_MODEL: &str = "fake-gimlet"; + const SLED_REVISION: u32 = 1; + const SLED_SERIAL: &str = "fake-serial"; + const KIND: &str = "etherstub"; + const ZONE_NAME: &str = "global"; // An etherstub we can use for testing. // @@ -221,15 +226,17 @@ mod tests { } #[test] - fn test_physical_datalink() { + fn test_sled_datalink() { let link = TestEtherstub::new(); - let sn = String::from("BRM000001"); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), + sled_serial: SLED_SERIAL.into(), link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let ctl = Ctl::new().unwrap(); let ctl = ctl.update().unwrap(); @@ -246,14 +253,16 @@ mod tests { #[tokio::test] async fn test_kstat_sampler() { let mut sampler = KstatSampler::new(&test_logger()).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), + sled_serial: SLED_SERIAL.into(), link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let details = CollectionDetails::never(Duration::from_secs(1)); let id = sampler.add_target(dl, details).await.unwrap(); @@ -294,14 +303,16 @@ mod tests { let limit = 2; let mut sampler = KstatSampler::with_sample_limit(&test_logger(), limit).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let details = CollectionDetails::never(Duration::from_secs(1)); sampler.add_target(dl, details).await.unwrap(); @@ -328,7 +339,7 @@ mod tests { let samples: Vec<_> = sampler.produce().unwrap().collect(); let (link_samples, dropped_samples): (Vec<_>, Vec<_>) = samples .iter() - .partition(|s| s.timeseries_name.contains("physical_data_link")); + .partition(|s| s.timeseries_name.contains("sled_data_link")); println!("{link_samples:#?}"); assert_eq!(link_samples.len(), limit); @@ -360,15 +371,17 @@ mod tests { // make sure we expire after the expected period. let log = test_logger(); let mut sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -417,15 +430,17 @@ mod tests { async fn test_kstat_start_time_is_equal() { let log = test_logger(); let mut sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -444,7 +459,7 @@ mod tests { let samples = sampler.produce().unwrap(); let mut start_times = samples .filter(|sample| { - sample.timeseries_name.as_str().starts_with("physical") + sample.timeseries_name.as_str().starts_with("sled") }) .map(|sample| sample.measurement.start_time().unwrap()); let first = start_times.next().unwrap(); @@ -461,7 +476,6 @@ mod tests { // make sure the creation times are pruned. let log = test_logger(); let sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); let path = KstatPath { module: "link".to_string(), @@ -469,12 +483,15 @@ mod tests { name: link.name.clone(), }; info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); @@ -508,7 +525,6 @@ mod tests { // make sure the creation times are pruned. let log = test_logger(); let sampler = KstatSampler::new(&log).unwrap(); - let sn = String::from("BRM000001"); let link = TestEtherstub::new(); let path = KstatPath { module: "link".to_string(), @@ -516,12 +532,15 @@ mod tests { name: link.name.clone(), }; info!(log, "created test etherstub"; "name" => &link.name); - let dl = physical_data_link::PhysicalDataLink { + let dl = sled_data_link::SledDataLink { rack_id: RACK_ID, sled_id: SLED_ID, - serial: sn.clone().into(), - hostname: sn.into(), - link_name: link.name.to_string().into(), + sled_serial: SLED_SERIAL.into(), + link_name: link.name.clone().into(), + kind: KIND.into(), + sled_model: SLED_MODEL.into(), + sled_revision: SLED_REVISION, + zone_name: ZONE_NAME.into(), }; let collection_interval = Duration::from_secs(1); let expiry = Duration::from_secs(1); diff --git a/oximeter/oximeter/schema/physical-data-link.toml b/oximeter/oximeter/schema/sled-data-link.toml similarity index 67% rename from oximeter/oximeter/schema/physical-data-link.toml rename to oximeter/oximeter/schema/sled-data-link.toml index d526aa6af1..40a294fc39 100644 --- a/oximeter/oximeter/schema/physical-data-link.toml +++ b/oximeter/oximeter/schema/sled-data-link.toml @@ -1,44 +1,44 @@ format_version = 1 [target] -name = "physical_data_link" -description = "A physical network link on a compute sled" +name = "sled_data_link" +description = "A network data link on a compute sled" authz_scope = "fleet" versions = [ - { version = 1, fields = [ "rack_id", "sled_id", "hostname", "serial", "link_name" ] }, - # This is the intended next version, but actual schema updates are not yet - # supported. This is left here as an example and breadcrumb to implement - # that update in the future. - #{ version = 2, fields = [ "rack_id", "sled_id", "serial", "model", "revision", "link_name" ] }, + { version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "zone_name" ] }, ] +[fields.kind] +type = "string" +description = "The kind or class of the data link" + +[fields.link_name] +type = "string" +description = "Name of the data link" + [fields.rack_id] type = "uuid" -description = "UUID for the link's sled" +description = "ID for the link's rack" [fields.sled_id] type = "uuid" -description = "UUID for the link's sled" - -[fields.hostname] -type = "string" -description = "Hostname of the link's sled" +description = "ID for the link's sled" -[fields.model] +[fields.sled_model] type = "string" description = "Model number of the link's sled" -[fields.revision] +[fields.sled_revision] type = "u32" description = "Revision number of the sled" -[fields.serial] +[fields.sled_serial] type = "string" description = "Serial number of the sled" -[fields.link_name] +[fields.zone_name] type = "string" -description = "Name of the physical data link" +description = "Name of the zone owning the link" [[metrics]] name = "bytes_sent" diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index b56f5d9df0..b038259c0a 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -11,6 +11,7 @@ use crate::common::instance::{ use crate::instance_manager::{ Error as ManagerError, InstanceManagerServices, InstanceTicket, }; +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use crate::params::ZoneBundleMetadata; use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; @@ -364,6 +365,9 @@ struct InstanceRunner { // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, + // Queue to notify the sled agent's metrics task about our VNICs. + metrics_queue: MetricsRequestQueue, + // Object representing membership in the "instance manager". instance_ticket: InstanceTicket, } @@ -806,6 +810,12 @@ impl InstanceRunner { return; }; + // Ask the sled-agent's metrics task to stop tracking statistics for our + // control VNIC and any OPTE ports in the zone as well. + self.metrics_queue + .untrack_zone_links(&running_state.running_zone) + .await; + // Take a zone bundle whenever this instance stops. if let Err(e) = self .zone_bundler @@ -1008,6 +1018,7 @@ impl Instance { storage, zone_bundler, zone_builder_factory, + metrics_queue, } = services; let mut dhcp_config = DhcpCfg { @@ -1097,6 +1108,7 @@ impl Instance { storage, zone_builder_factory, zone_bundler, + metrics_queue, instance_ticket: ticket, }; @@ -1395,8 +1407,11 @@ impl InstanceRunner { } async fn setup_propolis_inner(&mut self) -> Result { - // Create OPTE ports for the instance + // Create OPTE ports for the instance. We also store the names of all + // those ports to notify the metrics task to start collecting statistics + // for them. let mut opte_ports = Vec::with_capacity(self.requested_nics.len()); + let mut opte_port_names = Vec::with_capacity(self.requested_nics.len()); for nic in self.requested_nics.iter() { let (snat, ephemeral_ip, floating_ips) = if nic.primary { ( @@ -1416,6 +1431,7 @@ impl InstanceRunner { dhcp_config: self.dhcp_config.clone(), is_service: false, })?; + opte_port_names.push(port.0.name().to_string()); opte_ports.push(port); } @@ -1498,6 +1514,9 @@ impl InstanceRunner { .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(self.log, "Propolis SMF service is online"); + // Notify the metrics task about the instance zone's datalinks. + self.metrics_queue.track_zone_links(&running_zone).await; + // We use a custom client builder here because the default progenitor // one has a timeout of 15s but we want to be able to wait indefinitely. let reqwest_client = reqwest::ClientBuilder::new().build().unwrap(); @@ -1593,6 +1612,7 @@ impl InstanceRunner { mod tests { use super::*; use crate::fakes::nexus::{FakeNexusServer, ServerContext}; + use crate::metrics; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::CleanupContext; use camino_tempfile::Utf8TempDir; @@ -1617,12 +1637,18 @@ mod tests { use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::str::FromStr; + use tokio::sync::mpsc; use tokio::sync::watch::Receiver; use tokio::time::timeout; const TIMEOUT_DURATION: tokio::time::Duration = tokio::time::Duration::from_secs(30); + // Make the Propolis ID const, so we can refer to it in tests that check the + // zone name is included in requests to track the zone's links. + const PROPOLIS_ID: Uuid = + uuid::uuid!("e8e95a60-2aaf-4453-90e4-e0e58f126762"); + #[derive(Default, Clone)] enum ReceivedInstanceState { #[default] @@ -1780,16 +1806,16 @@ mod tests { nexus_client_with_resolver: NexusClientWithResolver, storage_handle: StorageHandle, temp_dir: &String, - ) -> Instance { + ) -> (Instance, MetricsRx) { let id = InstanceUuid::new_v4(); - let propolis_id = PropolisUuid::new_v4(); + let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID); let ticket = InstanceTicket::new_without_manager_for_test(id); let initial_state = fake_instance_initial_state(propolis_id, propolis_addr); - let services = fake_instance_manager_services( + let (services, rx) = fake_instance_manager_services( log, storage_handle, nexus_client_with_resolver, @@ -1808,7 +1834,7 @@ mod tests { serial: "fake-serial".into(), }; - Instance::new( + let instance = Instance::new( log.new(o!("component" => "Instance")), id, propolis_id, @@ -1818,7 +1844,8 @@ mod tests { sled_identifiers, metadata, ) - .unwrap() + .unwrap(); + (instance, rx) } fn fake_instance_initial_state( @@ -1868,12 +1895,15 @@ mod tests { } } + // Helper alias for the receive-side of the metrics request queue. + type MetricsRx = mpsc::Receiver; + fn fake_instance_manager_services( log: &Logger, storage_handle: StorageHandle, nexus_client_with_resolver: NexusClientWithResolver, temp_dir: &String, - ) -> InstanceManagerServices { + ) -> (InstanceManagerServices, MetricsRx) { let vnic_allocator = VnicAllocator::new("Foo", Etherstub("mystub".to_string())); let port_manager = PortManager::new( @@ -1888,14 +1918,17 @@ mod tests { cleanup_context, ); - InstanceManagerServices { + let (metrics_queue, rx) = MetricsRequestQueue::for_test(); + let services = InstanceManagerServices { nexus_client: nexus_client_with_resolver, vnic_allocator, port_manager, storage: storage_handle, zone_bundler, zone_builder_factory: ZoneBuilderFactory::fake(Some(temp_dir)), - } + metrics_queue, + }; + (services, rx) } #[tokio::test] @@ -1925,7 +1958,7 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); - let inst = timeout( + let (inst, mut metrics_rx) = timeout( TIMEOUT_DURATION, instance_struct( &log, @@ -1968,6 +2001,25 @@ mod tests { .expect("timed out waiting for InstanceState::Running in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); + // We should have received exactly one message on the metrics request + // queue, for the control VNIC. The instance has no OPTE ports. + let message = + metrics_rx.try_recv().expect("Should have received a message"); + let zone_name = + propolis_zone_name(&PropolisUuid::from_untyped_uuid(PROPOLIS_ID)); + assert_eq!( + message, + metrics::Message::TrackVnic { + zone_name, + name: "oxControlFoo0".into(), + }, + "Expected instance zone to send a message on its metrics \ + request queue, asking to track its control VNIC", + ); + metrics_rx + .try_recv() + .expect_err("The metrics request queue should have one message"); + storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -1997,7 +2049,7 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); - let inst = timeout( + let (inst, _) = timeout( TIMEOUT_DURATION, instance_struct( &log, @@ -2055,17 +2107,9 @@ mod tests { // time out while booting zone, on purpose! let boot_ctx = MockZones::boot_context(); - let start = tokio::time::Instant::now(); boot_ctx.expect().times(1).return_once(move |_| { - // We need something that will look like the zone taking a long time - // to boot, but we cannot use a `tokio::time` construct here since - // this is a blocking context and we cannot call `block_on()` - // recursively. We advance time by this amount below, so this will - // most likely result in a small number of additional sleeps until - // the timeout has really elased. - while start.elapsed() < TIMEOUT_DURATION * 2 { - std::thread::sleep(std::time::Duration::from_millis(1)); - } + // See below, there's no way to have this take a long time, other + // than just taking a long time _and_ blocking the rest of the test. Ok(()) }); let wait_ctx = illumos_utils::svc::wait_for_service_context(); @@ -2086,7 +2130,7 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); - let inst = timeout( + let (inst, _) = timeout( TIMEOUT_DURATION, instance_struct( &log, @@ -2111,20 +2155,24 @@ mod tests { .expect("failed to send Instance::put_state"); // Timeout our future waiting for the instance-state-change at - // `TIMEOUT_DURATION`, which should fail because zone boot will take - // twice that by construction. + // `TIMEOUT_DURATION` let timeout_fut = timeout(TIMEOUT_DURATION, put_rx); - // And advance time by twice that, so that the actual - // `MockZones::boot()` call should be exercised (or will be soon). + // We then step time by twice that in a single jump, in an attempt to + // cause `timeout_fut` to complete immediately, but without letting the + // zone actually boot. There's nothing really ensure things AFAIK, and + // no actual way to do that. The expectation above is synchronous, which + // means we cannot put an await point in it, and so have no way to cause + // that to wait for something we do here, at least without blocking the + // entire and only thread. tokio::time::advance(TIMEOUT_DURATION * 2).await; - tokio::time::resume(); - timeout_fut .await .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); + tokio::time::resume(); + if let ReceivedInstanceState::InstancePut(SledInstanceState { vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. @@ -2161,6 +2209,12 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); + let (services, mut metrics_rx) = fake_instance_manager_services( + &log, + storage_handle, + nexus_client, + &temp_dir, + ); let InstanceManagerServices { nexus_client, vnic_allocator: _, @@ -2168,12 +2222,8 @@ mod tests { storage, zone_bundler, zone_builder_factory, - } = fake_instance_manager_services( - &log, - storage_handle, - nexus_client, - &temp_dir, - ); + metrics_queue, + } = services; let etherstub = Etherstub("mystub".to_string()); @@ -2188,6 +2238,7 @@ mod tests { zone_bundler, zone_builder_factory, vmm_reservoir_manager, + metrics_queue, ) .unwrap(); @@ -2196,7 +2247,7 @@ mod tests { let propolis_addr = propolis_server.local_addr(); let instance_id = InstanceUuid::new_v4(); - let propolis_id = PropolisUuid::new_v4(); + let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID); let InstanceInitialState { hardware, instance_runtime, @@ -2246,6 +2297,24 @@ mod tests { .expect("timed out waiting for InstanceState::Running in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); + // We should have received exactly one message on the metrics request + // queue, for the control VNIC. The instance has no OPTE ports. + let message = + metrics_rx.try_recv().expect("Should have received a message"); + let zone_name = propolis_zone_name(&propolis_id); + assert_eq!( + message, + metrics::Message::TrackVnic { + zone_name, + name: "oxControlInstance0".into(), + }, + "Expected instance zone to send a message on its metrics \ + request queue, asking to track its control VNIC", + ); + metrics_rx + .try_recv() + .expect_err("The metrics request queue should have one message"); + storage_harness.cleanup().await; logctx.cleanup_successful(); } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index baa377a064..bb9303f5e2 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -6,6 +6,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use crate::params::InstanceExternalIpBody; use crate::params::InstanceMetadata; @@ -79,6 +80,7 @@ pub(crate) struct InstanceManagerServices { pub storage: StorageHandle, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, + pub metrics_queue: MetricsRequestQueue, } // Describes the internals of the "InstanceManager", though most of the @@ -108,6 +110,7 @@ impl InstanceManager { zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, vmm_reservoir_manager: VmmReservoirManagerHandle, + metrics_queue: MetricsRequestQueue, ) -> Result { let (tx, rx) = mpsc::channel(QUEUE_SIZE); let (terminate_tx, terminate_rx) = mpsc::unbounded_channel(); @@ -126,6 +129,7 @@ impl InstanceManager { storage, zone_bundler, zone_builder_factory, + metrics_queue, }; let runner_handle = @@ -452,6 +456,7 @@ struct InstanceManagerRunner { storage: StorageHandle, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, + metrics_queue: MetricsRequestQueue, } impl InstanceManagerRunner { @@ -637,6 +642,7 @@ impl InstanceManagerRunner { storage: self.storage.clone(), zone_bundler: self.zone_bundler.clone(), zone_builder_factory: self.zone_builder_factory.clone(), + metrics_queue: self.metrics_queue.clone(), }; let state = crate::instance::InstanceInitialState { diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index fcb260e93a..20083d8ebd 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -4,124 +4,415 @@ //! Metrics produced by the sled-agent for collection by oximeter. +use illumos_utils::running_zone::RunningZone; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::ProducerKind; -use oximeter::types::MetricsError; -use oximeter::types::ProducerRegistry; -use oximeter_instruments::kstat::link; +use omicron_common::api::internal::shared::SledIdentifiers; +use oximeter_instruments::kstat::link::sled_data_link::SledDataLink; use oximeter_instruments::kstat::CollectionDetails; use oximeter_instruments::kstat::Error as KstatError; use oximeter_instruments::kstat::KstatSampler; use oximeter_instruments::kstat::TargetId; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use sled_hardware_types::Baseboard; use slog::Logger; +use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::net::Ipv6Addr; use std::net::SocketAddr; -use std::sync::Arc; -use std::sync::Mutex; use std::time::Duration; +use tokio::sync::mpsc; use uuid::Uuid; /// The interval on which we ask `oximeter` to poll us for metric data. -pub(crate) const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30); +const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30); /// The interval on which we sample link metrics. -pub(crate) const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10); +// +// TODO(https://github.com/oxidecomputer/omicron/issues/5695) +// These should probably be sampled much densely. We may want to wait for +// https://github.com/oxidecomputer/omicron/issues/740, which would handle +// pagination between the producer and collector, as sampling at < 1s for many +// links could lead to quite large requests. Or we can eat the memory cost for +// now. +const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10); + +/// The interval after which we expire kstat-based collection of transient +/// links. +/// +/// This applies to VNICs and OPTE ports. Physical links are never expired, +/// since we should never expect them to disappear. While we strive to get +/// notifications before these transient links are deleted, it's always possible +/// we miss that, and so the data collection fails. If that fails for more than +/// this interval, we stop attempting to collect its data. +const TRANSIENT_LINK_EXPIRATION_INTERVAL: Duration = Duration::from_secs(60); /// The maximum Dropshot request size for the metrics server. const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024; +/// Size of the queue used to send messages to the metrics task. +const QUEUE_SIZE: usize = 64; + /// An error during sled-agent metric production. #[derive(Debug, thiserror::Error)] pub enum Error { #[error("Kstat-based metric failure")] Kstat(#[source] KstatError), - #[error("Failed to insert metric producer into registry")] - Registry(#[source] MetricsError), + #[error("Failed to start metric producer server")] + ProducerServer(#[source] oximeter_producer::Error), +} - #[error("Failed to fetch hostname")] - Hostname(#[source] std::io::Error), +/// Messages sent to the sled-agent metrics collection task. +/// +/// The sled-agent publish metrics to Oximeter, including statistics about +/// datalinks. This metrics task runs in the background, and code that creates +/// or deletes objects can notify this task to start / stop tracking statistics +/// for them. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(test, allow(dead_code))] +pub(crate) enum Message { + /// Start tracking the named physical link. + /// + /// This is only use on startup, to track the underlays. + TrackPhysical { zone_name: String, name: String }, + /// Track the named VNIC. + TrackVnic { zone_name: String, name: String }, + /// Stop tracking the named VNIC. + UntrackVnic { name: String }, + /// Track the named OPTE port. + TrackOptePort { zone_name: String, name: String }, + /// Stop tracking the named OPTE port. + UntrackOptePort { name: String }, + // TODO-completeness: We will probably want to track other kinds of + // statistics here too. For example, we could send messages when a zone is + // created / destroyed to track zonestats; we might also want to support + // passing an explicit `oximeter::Producer` type in, so that other code + // could attach their own producers. +} - #[error("Non-UTF8 hostname")] - NonUtf8Hostname, +/// Helper to define kinds of tracked links. +struct LinkKind; - #[error("Missing NULL byte in hostname")] - HostnameMissingNull, +impl LinkKind { + const PHYSICAL: &'static str = "physical"; + const VNIC: &'static str = "vnic"; + const OPTE: &'static str = "opte"; +} - #[error("Failed to start metric producer server")] - ProducerServer(#[source] oximeter_producer::Error), +/// The main task used to collect and publish sled-agent metrics. +async fn metrics_task( + sled_identifiers: SledIdentifiers, + kstat_sampler: KstatSampler, + _server: ProducerServer, + log: Logger, + mut rx: mpsc::Receiver, +) { + let mut tracked_links: BTreeMap = BTreeMap::new(); + + // Main polling loop, waiting for messages from other pieces of the code to + // track various statistics. + loop { + let Some(message) = rx.recv().await else { + error!(log, "channel closed, exiting"); + return; + }; + trace!(log, "received message"; "message" => ?message); + match message { + Message::TrackPhysical { zone_name, name } => { + let link = SledDataLink { + kind: LinkKind::PHYSICAL.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + zone_name: zone_name.into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::TrackVnic { zone_name, name } => { + let link = SledDataLink { + kind: LinkKind::VNIC.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + zone_name: zone_name.into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::UntrackVnic { name } => { + remove_datalink(&log, &mut tracked_links, &kstat_sampler, name) + .await + } + Message::TrackOptePort { zone_name, name } => { + let link = SledDataLink { + kind: LinkKind::OPTE.into(), + link_name: name.into(), + rack_id: sled_identifiers.rack_id, + sled_id: sled_identifiers.sled_id, + sled_model: sled_identifiers.model.clone().into(), + sled_revision: sled_identifiers.revision, + sled_serial: sled_identifiers.serial.clone().into(), + zone_name: zone_name.into(), + }; + add_datalink(&log, &mut tracked_links, &kstat_sampler, link) + .await; + } + Message::UntrackOptePort { name } => { + remove_datalink(&log, &mut tracked_links, &kstat_sampler, name) + .await + } + } + } } -// Basic metadata about the sled agent used when publishing metrics. -#[derive(Clone, Debug)] -struct SledIdentifiers { - sled_id: Uuid, - rack_id: Uuid, - baseboard: Baseboard, +/// Stop tracking a link by name. +async fn remove_datalink( + log: &Logger, + tracked_links: &mut BTreeMap, + kstat_sampler: &KstatSampler, + name: String, +) { + match tracked_links.remove(&name) { + Some(id) => match kstat_sampler.remove_target(id).await { + Ok(_) => { + debug!( + log, + "Removed VNIC from tracked links"; + "link_name" => name, + ); + } + Err(err) => { + error!( + log, + "Failed to remove VNIC from kstat sampler, \ + metrics may still be produced for it"; + "link_name" => name, + "error" => ?err, + ); + } + }, + None => { + debug!( + log, + "received message to delete VNIC, but \ + it is not in the list of tracked links"; + "link_name" => name, + ); + } + } } -/// Type managing all oximeter metrics produced by the sled-agent. -// -// TODO-completeness: We probably want to get kstats or other metrics in to this -// type from other parts of the code, possibly before the `SledAgent` itself -// exists. This is similar to the storage resources or other objects, most of -// which are essentially an `Arc>`. It would be nice to avoid that -// pattern, but until we have more statistics, it's not clear whether that's -// worth it right now. -#[derive(Clone)] +/// Start tracking a new link of the specified kind. +async fn add_datalink( + log: &Logger, + tracked_links: &mut BTreeMap, + kstat_sampler: &KstatSampler, + link: SledDataLink, +) { + match tracked_links.entry(link.link_name.to_string()) { + Entry::Vacant(entry) => { + let details = if is_transient_link(&link.kind) { + CollectionDetails::duration( + LINK_SAMPLE_INTERVAL, + TRANSIENT_LINK_EXPIRATION_INTERVAL, + ) + } else { + CollectionDetails::never(LINK_SAMPLE_INTERVAL) + }; + let kind = link.kind.clone(); + let zone_name = link.zone_name.clone(); + match kstat_sampler.add_target(link, details).await { + Ok(id) => { + debug!( + log, + "Added new link to kstat sampler"; + "link_name" => entry.key(), + "link_kind" => %kind, + "zone_name" => %zone_name, + ); + entry.insert(id); + } + Err(err) => { + error!( + log, + "Failed to add VNIC to kstat sampler, \ + no metrics will be collected for it"; + "link_name" => entry.key(), + "link_kind" => %kind, + "zone_name" => %zone_name, + "error" => ?err, + ); + } + } + } + Entry::Occupied(entry) => { + debug!( + log, + "received message to track VNIC, \ + but it is already being tracked"; + "link_name" => entry.key(), + ); + } + } +} + +/// Return true if this is considered a transient link, from the perspective of +/// its expiration behavior. +fn is_transient_link(kind: &str) -> bool { + kind == LinkKind::VNIC || kind == LinkKind::OPTE +} + +/// Manages sled-based metrics reported to Oximeter. +/// +/// This object is used to sample kernel statistics and produce other Oximeter +/// metrics for the sled agent. It runs a small background task responsible for +/// actually generating / reporting samples. Users operate with it through the +/// `MetricsHandle`. +#[derive(Debug)] pub struct MetricsManager { - metadata: Arc, - _log: Logger, - kstat_sampler: KstatSampler, - // TODO-scalability: We may want to generalize this to store any kind of - // tracked target, and use a naming scheme that allows us pick out which - // target we're interested in from the arguments. - // - // For example, we can use the link name to do this, for any physical or - // virtual link, because they need to be unique. We could also do the same - // for disks or memory. If we wanted to guarantee uniqueness, we could - // namespace them internally, e.g., `"datalink:{link_name}"` would be the - // real key. - tracked_links: Arc>>, - producer_server: Arc, + /// Receive-side of a channel used to pass the background task messages. + #[cfg_attr(test, allow(dead_code))] + tx: mpsc::Sender, + /// The background task itself. + _task: tokio::task::JoinHandle<()>, } impl MetricsManager { /// Construct a new metrics manager. - /// - /// This takes a few key pieces of identifying information that are used - /// when reporting sled-specific metrics. pub fn new( - sled_id: Uuid, - rack_id: Uuid, - baseboard: Baseboard, - sled_address: Ipv6Addr, - log: Logger, + log: &Logger, + identifiers: SledIdentifiers, + address: Ipv6Addr, ) -> Result { - let producer_server = - start_producer_server(&log, sled_id, sled_address)?; - let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?; - producer_server + let sampler = KstatSampler::new(log).map_err(Error::Kstat)?; + let server = start_producer_server(&log, identifiers.sled_id, address)?; + server .registry() - .register_producer(kstat_sampler.clone()) - .map_err(Error::Registry)?; - let tracked_links = Arc::new(Mutex::new(BTreeMap::new())); - Ok(Self { - metadata: Arc::new(SledIdentifiers { sled_id, rack_id, baseboard }), - _log: log, - kstat_sampler, - tracked_links, - producer_server, - }) + .register_producer(sampler.clone()) + .expect("actually infallible"); + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + let task_log = log.new(o!("component" => "metrics-task")); + let _task = tokio::task::spawn(metrics_task( + identifiers, + sampler, + server, + task_log, + rx, + )); + Ok(Self { tx, _task }) } - /// Return a reference to the contained producer registry. - pub fn registry(&self) -> &ProducerRegistry { - self.producer_server.registry() + /// Return a queue that can be used to send requests to the metrics task. + pub fn request_queue(&self) -> MetricsRequestQueue { + MetricsRequestQueue(self.tx.clone()) + } +} + +/// A cheap handle used to send requests to the metrics task. +#[derive(Clone, Debug)] +pub struct MetricsRequestQueue(mpsc::Sender); + +impl MetricsRequestQueue { + #[cfg(test)] + #[allow(dead_code)] + /// Return both halves of the queue used to send messages to the collection + /// task, for use in testing. + pub(crate) fn for_test() -> (Self, mpsc::Receiver) { + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + (Self(tx), rx) + } + + /// Ask the task to start tracking the named physical datalink. + pub async fn track_physical( + &self, + zone_name: impl Into, + name: impl Into, + ) { + let _ = self + .0 + .send(Message::TrackPhysical { + zone_name: zone_name.into(), + name: name.into(), + }) + .await; + } + + /// Ask the task to start tracking the named VNIC. + pub async fn track_vnic( + &self, + zone_name: impl Into, + name: impl Into, + ) { + let _ = self + .0 + .send(Message::TrackVnic { + zone_name: zone_name.into(), + name: name.into(), + }) + .await; + } + + /// Ask the task to stop tracking the named VNIC. + pub async fn untrack_vnic(&self, name: impl Into) { + let _ = self.0.send(Message::UntrackVnic { name: name.into() }).await; + } + + /// Ask the task to start tracking the named OPTE port. + pub async fn track_opte_port( + &self, + zone_name: impl Into, + name: impl Into, + ) { + let _ = self + .0 + .send(Message::TrackOptePort { + zone_name: zone_name.into(), + name: name.into(), + }) + .await; + } + + /// Ask the task to stop tracking the named OPTE port. + pub async fn untrack_opte_port(&self, name: impl Into) { + let _ = + self.0.send(Message::UntrackOptePort { name: name.into() }).await; + } + + /// Track all datalinks in a zone. + /// + /// This will collect and track: + /// + /// - The bootstrap VNIC, if it exists. + /// - The underlay control VNIC, which always exists. + /// - Any OPTE ports, which only exist for those with external connectivity. + pub async fn track_zone_links(&self, running_zone: &RunningZone) { + let zone_name = running_zone.name(); + self.track_vnic(zone_name, running_zone.control_vnic_name()).await; + if let Some(bootstrap_vnic) = running_zone.bootstrap_vnic_name() { + self.track_vnic(zone_name, bootstrap_vnic).await; + } + for port in running_zone.opte_port_names() { + self.track_opte_port(zone_name, port).await; + } + } + + /// Stop tracking all datalinks in a zone. + pub async fn untrack_zone_links(&self, running_zone: &RunningZone) { + self.untrack_vnic(running_zone.control_vnic_name()).await; + if let Some(bootstrap_vnic) = running_zone.bootstrap_vnic_name() { + self.untrack_vnic(bootstrap_vnic).await; + } + for port in running_zone.opte_port_names() { + self.untrack_opte_port(port).await; + } } } @@ -130,9 +421,8 @@ fn start_producer_server( log: &Logger, sled_id: Uuid, sled_address: Ipv6Addr, -) -> Result, Error> { +) -> Result { let log = log.new(slog::o!("component" => "producer-server")); - let registry = ProducerRegistry::with_id(sled_id); // Listen on any available socket, using our underlay address. let address = SocketAddr::new(sled_address.into(), 0); @@ -141,7 +431,7 @@ fn start_producer_server( let registration_address = None; let config = oximeter_producer::Config { server_info: ProducerEndpoint { - id: registry.producer_id(), + id: sled_id, kind: ProducerKind::SledAgent, address, interval: METRIC_COLLECTION_INTERVAL, @@ -150,84 +440,5 @@ fn start_producer_server( request_body_max_bytes: METRIC_REQUEST_MAX_SIZE, log: LogConfig::Logger(log), }; - ProducerServer::start(&config).map(Arc::new).map_err(Error::ProducerServer) -} - -impl MetricsManager { - /// Track metrics for a physical datalink. - pub async fn track_physical_link( - &self, - link_name: impl AsRef, - interval: Duration, - ) -> Result<(), Error> { - let hostname = hostname()?; - let link = link::physical_data_link::PhysicalDataLink { - rack_id: self.metadata.rack_id, - sled_id: self.metadata.sled_id, - serial: self.serial_number().into(), - hostname: hostname.into(), - link_name: link_name.as_ref().to_string().into(), - }; - let details = CollectionDetails::never(interval); - let id = self - .kstat_sampler - .add_target(link, details) - .await - .map_err(Error::Kstat)?; - self.tracked_links - .lock() - .unwrap() - .insert(link_name.as_ref().to_string(), id); - Ok(()) - } - - /// Stop tracking metrics for a datalink. - /// - /// This works for both physical and virtual links. - #[allow(dead_code)] - pub async fn stop_tracking_link( - &self, - link_name: impl AsRef, - ) -> Result<(), Error> { - let maybe_id = - self.tracked_links.lock().unwrap().remove(link_name.as_ref()); - if let Some(id) = maybe_id { - self.kstat_sampler.remove_target(id).await.map_err(Error::Kstat) - } else { - Ok(()) - } - } - - // Return the serial number out of the baseboard, if one exists. - fn serial_number(&self) -> String { - match &self.metadata.baseboard { - Baseboard::Gimlet { identifier, .. } => identifier.clone(), - Baseboard::Unknown => String::from("unknown"), - Baseboard::Pc { identifier, .. } => identifier.clone(), - } - } -} - -// Return the current hostname if possible. -fn hostname() -> Result { - // See netdb.h - const MAX_LEN: usize = 256; - let mut out = vec![0u8; MAX_LEN + 1]; - if unsafe { - libc::gethostname(out.as_mut_ptr() as *mut libc::c_char, MAX_LEN) - } == 0 - { - // Split into subslices by NULL bytes. - // - // We should have a NULL byte, since we've asked for no more than 255 - // bytes in a 256 byte buffer, but you never know. - let Some(chunk) = out.split(|x| *x == 0).next() else { - return Err(Error::HostnameMissingNull); - }; - let s = std::ffi::CString::new(chunk) - .map_err(|_| Error::NonUtf8Hostname)?; - s.into_string().map_err(|_| Error::NonUtf8Hostname) - } else { - Err(std::io::Error::last_os_error()).map_err(|_| Error::NonUtf8Hostname) - } + ProducerServer::start(&config).map_err(Error::ProducerServer) } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 9451484f21..ffa94cebcb 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -1,3 +1,4 @@ +use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClientWithResolver; use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; @@ -59,6 +60,7 @@ pub(crate) struct ProbeManagerInner { vnic_allocator: VnicAllocator, storage: StorageHandle, port_manager: PortManager, + metrics_queue: MetricsRequestQueue, running_probes: Mutex, } @@ -69,6 +71,7 @@ impl ProbeManager { etherstub: Etherstub, storage: StorageHandle, port_manager: PortManager, + metrics_queue: MetricsRequestQueue, log: Logger, ) -> Self { Self { @@ -87,6 +90,7 @@ impl ProbeManager { sled_id, storage, port_manager, + metrics_queue, }), } } @@ -345,6 +349,10 @@ impl ProbeManagerInner { rz.ensure_address_for_port("overlay", 0).await?; info!(self.log, "started probe {}", probe.id); + // Notify the sled-agent's metrics task to start tracking the VNIC and + // any OPTE ports in the zone. + self.metrics_queue.track_zone_links(&rz).await; + self.running_probes.lock().await.zones.insert(probe.id, rz); Ok(()) @@ -375,12 +383,19 @@ impl ProbeManagerInner { ) { match probes.zones.remove(&id) { Some(mut running_zone) => { + // TODO-correctness: There are no physical links in the zone, is + // this intended to delete the control VNIC? for l in running_zone.links_mut() { if let Err(e) = l.delete() { error!(self.log, "delete probe link {}: {e}", l.name()); } } + + // Ask the sled-agent to stop tracking our datalinks, and then + // delete the OPTE ports. + self.metrics_queue.untrack_zone_links(&running_zone).await; running_zone.release_opte_ports(); + if let Err(e) = running_zone.stop().await { error!(self.log, "stop probe: {e}") } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 42466ce094..b79aa517a0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -30,6 +30,7 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::BootstrapNetworking; use crate::config::SidecarRevision; +use crate::metrics::MetricsRequestQueue; use crate::params::{ DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt, TimeSync, ZoneBundleCause, ZoneBundleMetadata, @@ -678,6 +679,7 @@ struct SledAgentInfo { underlay_address: Ipv6Addr, rack_id: Uuid, rack_network_config: Option, + metrics_queue: MetricsRequestQueue, } pub(crate) enum TimeSyncConfig { @@ -897,13 +899,14 @@ impl ServiceManager { /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. - pub fn sled_agent_started( + pub async fn sled_agent_started( &self, config: Config, port_manager: PortManager, underlay_address: Ipv6Addr, rack_id: Uuid, rack_network_config: Option, + metrics_queue: MetricsRequestQueue, ) -> Result<(), Error> { info!(&self.inner.log, "sled agent started"; "underlay_address" => underlay_address.to_string()); self.inner @@ -918,10 +921,21 @@ impl ServiceManager { underlay_address, rack_id, rack_network_config, + metrics_queue: metrics_queue.clone(), }) .map_err(|_| "already set".to_string()) .expect("Sled Agent should only start once"); + // At this point, we've already started up the switch zone, but the + // VNICs inside it cannot have been tracked by the sled-agent's metrics + // task (the sled-agent didn't exist at the time we started the switch + // zone!). Notify that task about the zone's VNICs now. + if let SwitchZoneState::Running { zone, .. } = + &*self.inner.switch_zone.lock().await + { + metrics_queue.track_zone_links(zone).await; + } + Ok(()) } @@ -1510,7 +1524,7 @@ impl ServiceManager { ServiceBuilder::new("network/dns/client") .add_instance(ServiceInstanceBuilder::new("default")); - match &request { + let running_zone = match &request { ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: OmicronZoneConfig { @@ -1561,7 +1575,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup clickhouse profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1617,7 +1631,7 @@ impl ServiceManager { err, ) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1691,7 +1705,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup CRDB profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1749,7 +1763,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup crucible profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1797,8 +1811,7 @@ impl ServiceManager { .add_to_zone(&self.inner.log, &installed_zone) .await .map_err(|err| Error::io("crucible pantry profile", err))?; - let running_zone = RunningZone::boot(installed_zone).await?; - return Ok(running_zone); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -1846,7 +1859,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Oximeter profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -1912,7 +1925,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup External DNS profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2023,7 +2036,7 @@ impl ServiceManager { Error::io("Failed to set up NTP profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2103,7 +2116,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Internal DNS profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: @@ -2251,7 +2264,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Nexus profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } ZoneArgs::Switch(SwitchZoneConfigLocal { zone: SwitchZoneConfig { id, services, addresses }, @@ -2950,9 +2963,27 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Switch zone profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + RunningZone::boot(installed_zone).await? } + }; + + // Now that we've booted the zone, we'll notify the sled-agent about: + // + // - Its control VNIC (all zones have one) + // - Any bootstrap network VNIC (only the switch zone has one) + // - Any OPTE ports (instance zones, or Oxide zones with external + // connectivity). + // + // Note that we'll almost always have started the sled-agent at this + // point. The only exception is the switch zone, during bootstrapping + // but before we've either run RSS or unlocked the rack. In both those + // cases, we have a `StartSledAgentRequest`, and so a metrics queue. + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + queue.track_zone_links(&running_zone).await; } + Ok(running_zone) } // Ensures that a single Omicron zone is running. @@ -3317,6 +3348,13 @@ impl ServiceManager { ); return; }; + // Ensure that the sled agent's metrics task is not tracking the zone's + // VNICs or OPTE ports. + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + queue.untrack_zone_links(&zone.runtime).await; + } debug!( log, "removing an existing zone"; @@ -3354,6 +3392,12 @@ impl ServiceManager { "zone" => &zone_name, "state" => ?zone.state(), ); + // NOTE: We might want to tell the sled-agent's metrics task to + // stop tracking any links in this zone. However, we don't have + // very easy access to them, without running a command in the + // zone. These links are about to be deleted, and the metrics + // task will expire them after a while anyway, but it might be + // worth the trouble to do that in the future. if let Err(e) = Zones::halt_and_remove_logged(&self.inner.log, &zone_name) .await @@ -4237,6 +4281,15 @@ impl ServiceManager { } (SwitchZoneState::Running { zone, .. }, None) => { info!(log, "Disabling {zone_typestr} zone (was running)"); + + // Notify the sled-agent's metrics task to stop collecting from + // the VNICs in the zone (if the agent exists). + if let Some(queue) = + self.inner.sled_info.get().map(|sa| &sa.metrics_queue) + { + queue.untrack_zone_links(zone).await; + } + let _ = zone.stop().await; *sled_zone = SwitchZoneState::Disabled; } @@ -4309,6 +4362,8 @@ impl ServiceManager { #[cfg(all(test, target_os = "illumos"))] mod test { + use crate::metrics; + use super::*; use illumos_utils::{ dladm::{ @@ -4320,8 +4375,12 @@ mod test { }; use sled_storage::manager_test_harness::StorageManagerTestHarness; - use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; + use std::{ + net::{Ipv6Addr, SocketAddrV6}, + time::Duration, + }; + use tokio::sync::mpsc::error::TryRecvError; use uuid::Uuid; // Just placeholders. Not used. @@ -4331,6 +4390,10 @@ mod test { const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_ntp"; const EXPECTED_PORT: u16 = 12223; + // Timeout within which we must have received a message about a zone's links + // to track. This is very generous. + const LINK_NOTIFICATION_TIMEOUT: Duration = Duration::from_secs(5); + fn make_bootstrap_networking_config() -> BootstrapNetworking { BootstrapNetworking { bootstrap_etherstub: Etherstub( @@ -4596,7 +4659,7 @@ mod test { // // This will shut down all allocated zones, and delete their // associated VNICs. - fn drop_service_manager(mgr: ServiceManager) { + async fn drop_service_manager(mgr: ServiceManager) { let halt_ctx = MockZones::halt_and_remove_logged_context(); halt_ctx.expect().returning(|_, name| { assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); @@ -4605,6 +4668,16 @@ mod test { let delete_vnic_ctx = MockDladm::delete_vnic_context(); delete_vnic_ctx.expect().returning(|_| Ok(())); + // Also send a message to the metrics task that the VNIC has been + // deleted. + mgr.inner + .sled_info + .get() + .unwrap() + .metrics_queue + .untrack_vnic("oxControlService0") + .await; + // Explicitly drop the service manager drop(mgr); } @@ -4715,10 +4788,11 @@ mod test { mgr } - fn sled_agent_started( + async fn sled_agent_started( log: &slog::Logger, test_config: &TestConfig, mgr: &ServiceManager, + metrics_queue: MetricsRequestQueue, ) { let port_manager = PortManager::new( log.new(o!("component" => "PortManager")), @@ -4733,7 +4807,9 @@ mod test { Ipv6Addr::LOCALHOST, Uuid::new_v4(), None, + metrics_queue, ) + .await .unwrap(); } } @@ -4746,7 +4822,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v1 = Generation::new(); let found = @@ -4770,7 +4853,26 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // Check that we received a message about the zone's VNIC. + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!( + message, + metrics::Message::TrackVnic { + zone_name: "global".into(), + name: "oxControlService0".into() + }, + ); + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4787,7 +4889,14 @@ mod test { let mgr = helper.new_service_manager_with_timesync(TimeSyncConfig::Fail); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v1 = Generation::new(); let found = @@ -4822,6 +4931,10 @@ mod test { Error::TimeNotSynchronized ); + // Ensure we have _not_ received a message about the zone's VNIC, + // because there isn't a zone. + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + // Next, ensure this still converts to an "unavail" common error let common_err = omicron_common::api::external::Error::from(err); assert_matches::assert_matches!( @@ -4846,7 +4959,7 @@ mod test { .await .unwrap(); - drop_service_manager(mgr); + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); } @@ -4860,7 +4973,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v2 = Generation::new().next(); let id = Uuid::new_v4(); @@ -4874,7 +4994,28 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // In this case, the manager creates the zone once, and then "ensuring" + // it a second time is a no-op. So we simply expect the same message + // sequence as starting a zone for the first time. + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!( + message, + metrics::Message::TrackVnic { + zone_name: "global".into(), + name: "oxControlService0".into() + }, + ); + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4892,7 +5033,14 @@ mod test { // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; let v2 = Generation::new().next(); let id = Uuid::new_v4(); @@ -4903,13 +5051,47 @@ mod test { String::from(test_config.config_dir.path().as_str()), ) .await; - drop_service_manager(mgr); + drop_service_manager(mgr).await; + + // Check that we received a message about the zone's VNIC. Since the + // manager is being dropped, it should also send a message about the + // VNIC being deleted. + let zone_name = format!("oxz_ntp_{}", id); + for expected_message in [ + metrics::Message::TrackVnic { + zone_name, + name: "oxControlService0".into(), + }, + metrics::Message::UntrackVnic { name: "oxControlService0".into() }, + ] { + println!("Expecting message from manager: {expected_message:#?}"); + let message = tokio::time::timeout( + LINK_NOTIFICATION_TIMEOUT, + metrics_rx.recv(), + ) + .await + .expect( + "Should have received a message about the zone's VNIC within the timeout" + ) + .expect("Should have received a message about the zone's VNIC"); + assert_eq!(message, expected_message,); + } + // Note that the manager has been dropped, so we should get + // disconnected, not empty. + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Disconnected)); // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. let _expectations = expect_new_service(EXPECTED_ZONE_NAME_PREFIX); let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_queue, + ) + .await; illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let found = @@ -4918,7 +5100,13 @@ mod test { assert_eq!(found.zones.len(), 1); assert_eq!(found.zones[0].id, id); - drop_service_manager(mgr); + // Note that the `omicron_zones_list()` request just returns the + // configured zones, stored in the on-disk ledger. There is nothing + // above that actually ensures that those zones exist, as far as I can + // tell! + assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); + + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4936,7 +5124,14 @@ mod test { // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; let v1 = Generation::new(); let v2 = v1.next(); @@ -4948,7 +5143,7 @@ mod test { String::from(test_config.config_dir.path().as_str()), ) .await; - drop_service_manager(mgr); + drop_service_manager(mgr).await; // Next, delete the ledger. This means the zone we just created will not // be remembered on the next initialization. @@ -4959,14 +5154,21 @@ mod test { // Observe that the old service is not re-initialized. let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; let found = mgr.omicron_zones_list().await.expect("failed to list zones"); assert_eq!(found.generation, v1); assert!(found.zones.is_empty()); - drop_service_manager(mgr); + drop_service_manager(mgr).await; helper.cleanup().await; logctx.cleanup_successful(); @@ -4981,7 +5183,14 @@ mod test { let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); - LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + let metrics_handles = MetricsRequestQueue::for_test(); + LedgerTestHelper::sled_agent_started( + &logctx.log, + &test_config, + &mgr, + metrics_handles.0.clone(), + ) + .await; // Like the normal tests, set up a generation with one zone in it. let v1 = Generation::new(); @@ -5086,7 +5295,7 @@ mod test { found_zones.sort_by(|a, b| a.id.cmp(&b.id)); assert_eq!(our_zones, found_zones); - drop_service_manager(mgr); + drop_service_manager(mgr).await; illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); helper.cleanup().await; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 92dcfd2dfe..8fa18b0a63 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -63,7 +63,6 @@ use omicron_common::backoff::{ use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; -use oximeter::types::ProducerRegistry; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; @@ -336,7 +335,7 @@ struct SledAgentInner { bootstore: bootstore::NodeHandle, // Object handling production of metrics for oximeter. - metrics_manager: MetricsManager, + _metrics_manager: MetricsManager, // Handle to the traffic manager for writing OS updates to our boot disks. boot_disk_os_writer: BootDiskOsWriter, @@ -434,38 +433,23 @@ impl SledAgent { illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?; // Start collecting metric data. - // - // First, we're creating a shareable type for managing the metrics - // themselves early on, so that we can pass it to other components of - // the sled agent that need it. - // - // Then we'll start tracking physical links and register as a producer - // with Nexus in the background. - let metrics_manager = MetricsManager::new( - request.body.id, - request.body.rack_id, - long_running_task_handles.hardware_manager.baseboard(), - *sled_address.ip(), - log.new(o!("component" => "MetricsManager")), - )?; + let baseboard = long_running_task_handles.hardware_manager.baseboard(); + let identifiers = SledIdentifiers { + rack_id: request.body.rack_id, + sled_id: request.body.id, + model: baseboard.model().to_string(), + revision: baseboard.revision(), + serial: baseboard.identifier().to_string(), + }; + let metrics_manager = + MetricsManager::new(&log, identifiers, *sled_address.ip())?; // Start tracking the underlay physical links. - for nic in underlay::find_nics(&config.data_links)? { - let link_name = nic.interface(); - if let Err(e) = metrics_manager - .track_physical_link( - link_name, - crate::metrics::LINK_SAMPLE_INTERVAL, - ) - .await - { - error!( - log, - "failed to start tracking physical link metrics"; - "link_name" => link_name, - "error" => ?e, - ); - } + for link in underlay::find_chelsio_links(&config.data_links)? { + metrics_manager + .request_queue() + .track_physical("global", &link.0) + .await; } // Create the PortManager to manage all the OPTE ports on the sled. @@ -496,6 +480,7 @@ impl SledAgent { long_running_task_handles.zone_bundler.clone(), ZoneBuilderFactory::default(), vmm_reservoir_manager.clone(), + metrics_manager.request_queue(), )?; let update_config = ConfigUpdates { @@ -551,13 +536,16 @@ impl SledAgent { network config from bootstore", ); - services.sled_agent_started( - svc_config, - port_manager.clone(), - *sled_address.ip(), - request.body.rack_id, - rack_network_config.clone(), - )?; + services + .sled_agent_started( + svc_config, + port_manager.clone(), + *sled_address.ip(), + request.body.rack_id, + rack_network_config.clone(), + metrics_manager.request_queue(), + ) + .await?; // Spawn a background task for managing notifications to nexus // about this sled-agent. @@ -581,6 +569,7 @@ impl SledAgent { etherstub.clone(), storage_manager.clone(), port_manager.clone(), + metrics_manager.request_queue(), log.new(o!("component" => "ProbeManager")), ); @@ -604,7 +593,7 @@ impl SledAgent { rack_network_config, zone_bundler: long_running_task_handles.zone_bundler.clone(), bootstore: long_running_task_handles.bootstore.clone(), - metrics_manager, + _metrics_manager: metrics_manager, boot_disk_os_writer: BootDiskOsWriter::new(&parent_log), }), log: log.clone(), @@ -1193,11 +1182,6 @@ impl SledAgent { self.inner.port_manager.vpc_routes_ensure(routes).map_err(Error::from) } - /// Return the metric producer registry. - pub fn metrics_registry(&self) -> &ProducerRegistry { - self.inner.metrics_manager.registry() - } - pub(crate) fn storage(&self) -> &StorageHandle { &self.inner.storage } diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 90f5339e84..071cf496bb 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -100,7 +100,7 @@ bgp = [] # You can configure multiple uplinks by repeating the following stanza [[rack_network_config.ports]] # Routes associated with this port. -routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] +routes = [{nexthop = "192.168.1.1", destination = "0.0.0.0/0"}] # Addresses associated with this port. addresses = [{address = "192.168.1.30/24"}] # Name of the uplink port. This should always be "qsfp0" when using softnpu. diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 77ca52a647..42068de0f6 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -84,7 +84,7 @@ data_links = ["net0", "net1"] request_body_max_bytes = 2_147_483_648 [log] -level = "info" +level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append"