diff --git a/Cargo.lock b/Cargo.lock index 0a097336930..a6ce6ccb117 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5891,7 +5891,6 @@ dependencies = [ "omicron-workspace-hack", "oximeter 0.1.0", "rand 0.8.5", - "serde", "slog", "slog-async", "slog-term", diff --git a/oximeter/instruments/Cargo.toml b/oximeter/instruments/Cargo.toml index 6199f6bfd18..3d3596d4585 100644 --- a/oximeter/instruments/Cargo.toml +++ b/oximeter/instruments/Cargo.toml @@ -9,9 +9,7 @@ chrono.workspace = true dropshot.workspace = true futures.workspace = true http = { workspace = true, optional = true } -kstat-rs = { workspace = true, optional = true } oximeter.workspace = true -serde = { workspace = true, optional = true } slog.workspace = true tokio.workspace = true thiserror.workspace = true @@ -21,9 +19,12 @@ omicron-workspace-hack.workspace = true [features] default = ["http-instruments", "kstat"] http-instruments = ["http"] -kstat = ["kstat-rs", "serde"] +kstat = ["kstat-rs"] [dev-dependencies] rand.workspace = true slog-async.workspace = true slog-term.workspace = true + +[target.'cfg(target_os = "illumos")'.dependencies] +kstat-rs = { workspace = true, optional = true } diff --git a/oximeter/instruments/src/lib.rs b/oximeter/instruments/src/lib.rs index 0b18798683c..d003e717395 100644 --- a/oximeter/instruments/src/lib.rs +++ b/oximeter/instruments/src/lib.rs @@ -9,5 +9,5 @@ #[cfg(feature = "http-instruments")] pub mod http; -#[cfg(feature = "kstat")] +#[cfg(all(feature = "kstat", target_os = "illumos"))] pub mod kstat; diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index 2cb95bc3f29..0b28ccd645f 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -6,17 +6,24 @@ use oximeter::types::MetricsError; use oximeter::types::ProducerRegistry; -use oximeter_instruments::kstat::link; -use oximeter_instruments::kstat::CollectionDetails; -use oximeter_instruments::kstat::Error as KstatError; -use oximeter_instruments::kstat::KstatSampler; -use oximeter_instruments::kstat::TargetId; use sled_hardware::Baseboard; use slog::Logger; -use std::collections::BTreeMap; use std::time::Duration; use uuid::Uuid; +cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + use oximeter_instruments::kstat::link; + use oximeter_instruments::kstat::CollectionDetails; + use oximeter_instruments::kstat::Error as KstatError; + use oximeter_instruments::kstat::KstatSampler; + use oximeter_instruments::kstat::TargetId; + use std::collections::BTreeMap; + } else { + use anyhow::anyhow; + } +} + /// The interval on which we ask `oximeter` to poll us for metric data. pub(crate) const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30); @@ -26,9 +33,14 @@ pub(crate) const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10); /// An error during sled-agent metric production. #[derive(Debug, thiserror::Error)] pub enum Error { + #[cfg(target_os = "illumos")] #[error("Kstat-based metric failure")] Kstat(#[source] KstatError), + #[cfg(not(target_os = "illumos"))] + #[error("Kstat-based metric failure")] + Kstat(#[source] anyhow::Error), + #[error("Failed to insert metric producer into registry")] Registry(#[source] MetricsError), @@ -45,11 +57,15 @@ pub enum Error { // pattern, but until we have more statistics, it's not clear whether that's // worth it right now. #[derive(Clone, Debug)] +// NOTE: The ID fields aren't used on non-illumos systems, rather than changing +// the name of fields that are not yet used. +#[cfg_attr(not(target_os = "illumos"), allow(dead_code))] pub struct MetricsManager { sled_id: Uuid, rack_id: Uuid, baseboard: Baseboard, _log: Logger, + #[cfg(target_os = "illumos")] kstat_sampler: KstatSampler, // TODO-scalability: We may want to generalize this to store any kind of // tracked target, and use a naming scheme that allows us pick out which @@ -60,6 +76,7 @@ pub struct MetricsManager { // for disks or memory. If we wanted to guarantee uniqueness, we could // namespace them internally, e.g., `"datalink:{link_name}"` would be the // real key. + #[cfg(target_os = "illumos")] tracked_links: BTreeMap, registry: ProducerRegistry, } @@ -75,18 +92,26 @@ impl MetricsManager { baseboard: Baseboard, log: Logger, ) -> Result { - let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?; let registry = ProducerRegistry::with_id(sled_id); - registry - .register_producer(kstat_sampler.clone()) - .map_err(Error::Registry)?; + + cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?; + registry + .register_producer(kstat_sampler.clone()) + .map_err(Error::Registry)?; + let tracked_links = BTreeMap::new(); + } + } Ok(Self { sled_id, rack_id, baseboard, _log: log, + #[cfg(target_os = "illumos")] kstat_sampler, - tracked_links: BTreeMap::new(), + #[cfg(target_os = "illumos")] + tracked_links, registry, }) } @@ -95,7 +120,10 @@ impl MetricsManager { pub fn registry(&self) -> &ProducerRegistry { &self.registry } +} +#[cfg(target_os = "illumos")] +impl MetricsManager { /// Track metrics for a physical datalink. pub async fn track_physical_link( &mut self, @@ -106,7 +134,7 @@ impl MetricsManager { rack_id: self.rack_id, sled_id: self.sled_id, serial: self.serial_number(), - hostname: hostname().await?, + hostname: Self::hostname().await?, link_name: link_name.as_ref().to_string(), }; let details = CollectionDetails::never(interval); @@ -165,13 +193,53 @@ impl MetricsManager { Baseboard::Pc { identifier, .. } => identifier.clone(), } } + + async fn hostname() -> Result { + tokio::process::Command::new("hostname") + .env_clear() + .output() + .await + .map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string()) + .map_err(Error::Hostname) + } } -async fn hostname() -> Result { - tokio::process::Command::new("hostname") - .env_clear() - .output() - .await - .map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string()) - .map_err(Error::Hostname) +#[cfg(not(target_os = "illumos"))] +impl MetricsManager { + /// Track metrics for a physical datalink. + pub async fn track_physical_link( + &mut self, + _link_name: impl AsRef, + _interval: Duration, + ) -> Result<(), Error> { + Err(Error::Kstat(anyhow!( + "kstat metrics are not supported on this platform" + ))) + } + + /// Stop tracking metrics for a datalink. + /// + /// This works for both physical and virtual links. + #[allow(dead_code)] + pub async fn stop_tracking_link( + &mut self, + _link_name: impl AsRef, + ) -> Result<(), Error> { + Err(Error::Kstat(anyhow!( + "kstat metrics are not supported on this platform" + ))) + } + + /// Track metrics for a virtual datalink. + #[allow(dead_code)] + pub async fn track_virtual_link( + &self, + _link_name: impl AsRef, + _hostname: impl AsRef, + _interval: Duration, + ) -> Result<(), Error> { + Err(Error::Kstat(anyhow!( + "kstat metrics are not supported on this platform" + ))) + } } diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index cdde2fddfeb..fea3cfa5d8b 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -101,7 +101,7 @@ bgp = [] # You can configure multiple uplinks by repeating the following stanza [[rack_network_config.ports]] # Routes associated with this port. -routes = [{nexthop = "192.168.1.1", destination = "0.0.0.0/0"}] +routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. addresses = ["192.168.1.30/32"] # Name of the uplink port. This should always be "qsfp0" when using softnpu. diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 3c4df9d45c1..b4cb7e6cffc 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -72,7 +72,7 @@ switch_zone_maghemite_links = ["tfportrear0_0"] data_links = ["net0", "net1"] [log] -level = "debug" +level = "info" mode = "file" path = "/dev/stdout" if_exists = "append"