Skip to content

Commit

Permalink
Remove local config, some cross-OS cfg directives
Browse files Browse the repository at this point in the history
  • Loading branch information
bnaecker committed Oct 26, 2023
1 parent 214c954 commit 5cffc6a
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 26 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions oximeter/instruments/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ chrono.workspace = true
dropshot.workspace = true
futures.workspace = true
http = { workspace = true, optional = true }
kstat-rs = { workspace = true, optional = true }
oximeter.workspace = true
serde = { workspace = true, optional = true }
slog.workspace = true
tokio.workspace = true
thiserror.workspace = true
Expand All @@ -21,9 +19,12 @@ omicron-workspace-hack.workspace = true
[features]
default = ["http-instruments", "kstat"]
http-instruments = ["http"]
kstat = ["kstat-rs", "serde"]
kstat = ["kstat-rs"]

[dev-dependencies]
rand.workspace = true
slog-async.workspace = true
slog-term.workspace = true

[target.'cfg(target_os = "illumos")'.dependencies]
kstat-rs = { workspace = true, optional = true }
2 changes: 1 addition & 1 deletion oximeter/instruments/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
#[cfg(feature = "http-instruments")]
pub mod http;

#[cfg(feature = "kstat")]
#[cfg(all(feature = "kstat", target_os = "illumos"))]
pub mod kstat;
106 changes: 87 additions & 19 deletions sled-agent/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,24 @@
use oximeter::types::MetricsError;
use oximeter::types::ProducerRegistry;
use oximeter_instruments::kstat::link;
use oximeter_instruments::kstat::CollectionDetails;
use oximeter_instruments::kstat::Error as KstatError;
use oximeter_instruments::kstat::KstatSampler;
use oximeter_instruments::kstat::TargetId;
use sled_hardware::Baseboard;
use slog::Logger;
use std::collections::BTreeMap;
use std::time::Duration;
use uuid::Uuid;

cfg_if::cfg_if! {
if #[cfg(target_os = "illumos")] {
use oximeter_instruments::kstat::link;
use oximeter_instruments::kstat::CollectionDetails;
use oximeter_instruments::kstat::Error as KstatError;
use oximeter_instruments::kstat::KstatSampler;
use oximeter_instruments::kstat::TargetId;
use std::collections::BTreeMap;
} else {
use anyhow::anyhow;
}
}

/// The interval on which we ask `oximeter` to poll us for metric data.
pub(crate) const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(30);

Expand All @@ -26,9 +33,14 @@ pub(crate) const LINK_SAMPLE_INTERVAL: Duration = Duration::from_secs(10);
/// An error during sled-agent metric production.
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[cfg(target_os = "illumos")]
#[error("Kstat-based metric failure")]
Kstat(#[source] KstatError),

#[cfg(not(target_os = "illumos"))]
#[error("Kstat-based metric failure")]
Kstat(#[source] anyhow::Error),

#[error("Failed to insert metric producer into registry")]
Registry(#[source] MetricsError),

Expand All @@ -45,11 +57,15 @@ pub enum Error {
// pattern, but until we have more statistics, it's not clear whether that's
// worth it right now.
#[derive(Clone, Debug)]
// NOTE: The ID fields aren't used on non-illumos systems, rather than changing
// the name of fields that are not yet used.
#[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
pub struct MetricsManager {
sled_id: Uuid,
rack_id: Uuid,
baseboard: Baseboard,
_log: Logger,
#[cfg(target_os = "illumos")]
kstat_sampler: KstatSampler,
// TODO-scalability: We may want to generalize this to store any kind of
// tracked target, and use a naming scheme that allows us pick out which
Expand All @@ -60,6 +76,7 @@ pub struct MetricsManager {
// for disks or memory. If we wanted to guarantee uniqueness, we could
// namespace them internally, e.g., `"datalink:{link_name}"` would be the
// real key.
#[cfg(target_os = "illumos")]
tracked_links: BTreeMap<String, TargetId>,
registry: ProducerRegistry,
}
Expand All @@ -75,18 +92,26 @@ impl MetricsManager {
baseboard: Baseboard,
log: Logger,
) -> Result<Self, Error> {
let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?;
let registry = ProducerRegistry::with_id(sled_id);
registry
.register_producer(kstat_sampler.clone())
.map_err(Error::Registry)?;

cfg_if::cfg_if! {
if #[cfg(target_os = "illumos")] {
let kstat_sampler = KstatSampler::new(&log).map_err(Error::Kstat)?;
registry
.register_producer(kstat_sampler.clone())
.map_err(Error::Registry)?;
let tracked_links = BTreeMap::new();
}
}
Ok(Self {
sled_id,
rack_id,
baseboard,
_log: log,
#[cfg(target_os = "illumos")]
kstat_sampler,
tracked_links: BTreeMap::new(),
#[cfg(target_os = "illumos")]
tracked_links,
registry,
})
}
Expand All @@ -95,7 +120,10 @@ impl MetricsManager {
pub fn registry(&self) -> &ProducerRegistry {
&self.registry
}
}

#[cfg(target_os = "illumos")]
impl MetricsManager {
/// Track metrics for a physical datalink.
pub async fn track_physical_link(
&mut self,
Expand All @@ -106,7 +134,7 @@ impl MetricsManager {
rack_id: self.rack_id,
sled_id: self.sled_id,
serial: self.serial_number(),
hostname: hostname().await?,
hostname: Self::hostname().await?,
link_name: link_name.as_ref().to_string(),
};
let details = CollectionDetails::never(interval);
Expand Down Expand Up @@ -165,13 +193,53 @@ impl MetricsManager {
Baseboard::Pc { identifier, .. } => identifier.clone(),
}
}

async fn hostname() -> Result<String, Error> {
tokio::process::Command::new("hostname")
.env_clear()
.output()
.await
.map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
.map_err(Error::Hostname)
}
}

async fn hostname() -> Result<String, Error> {
tokio::process::Command::new("hostname")
.env_clear()
.output()
.await
.map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
.map_err(Error::Hostname)
#[cfg(not(target_os = "illumos"))]
impl MetricsManager {
/// Track metrics for a physical datalink.
pub async fn track_physical_link(
&mut self,
_link_name: impl AsRef<str>,
_interval: Duration,
) -> Result<(), Error> {
Err(Error::Kstat(anyhow!(
"kstat metrics are not supported on this platform"
)))
}

/// Stop tracking metrics for a datalink.
///
/// This works for both physical and virtual links.
#[allow(dead_code)]
pub async fn stop_tracking_link(
&mut self,
_link_name: impl AsRef<str>,
) -> Result<(), Error> {
Err(Error::Kstat(anyhow!(
"kstat metrics are not supported on this platform"
)))
}

/// Track metrics for a virtual datalink.
#[allow(dead_code)]
pub async fn track_virtual_link(
&self,
_link_name: impl AsRef<str>,
_hostname: impl AsRef<str>,
_interval: Duration,
) -> Result<(), Error> {
Err(Error::Kstat(anyhow!(
"kstat metrics are not supported on this platform"
)))
}
}
2 changes: 1 addition & 1 deletion smf/sled-agent/non-gimlet/config-rss.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ bgp = []
# You can configure multiple uplinks by repeating the following stanza
[[rack_network_config.ports]]
# Routes associated with this port.
routes = [{nexthop = "192.168.1.1", destination = "0.0.0.0/0"}]
routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}]
# Addresses associated with this port.
addresses = ["192.168.1.30/32"]
# Name of the uplink port. This should always be "qsfp0" when using softnpu.
Expand Down
2 changes: 1 addition & 1 deletion smf/sled-agent/non-gimlet/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ switch_zone_maghemite_links = ["tfportrear0_0"]
data_links = ["net0", "net1"]

[log]
level = "debug"
level = "info"
mode = "file"
path = "/dev/stdout"
if_exists = "append"

0 comments on commit 5cffc6a

Please sign in to comment.