Skip to content

Commit

Permalink
Add tfport-data-link timeseries schema and SMF config adds.
Browse files Browse the repository at this point in the history
Related to oxidecomputer/dendrite#1033.

New timeseries (from sled-data-link) as we've added switch information
in the metadata.
  • Loading branch information
zeeshanlakhani committed Nov 1, 2024
1 parent 6fd24c7 commit 701b06f
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 29 deletions.
2 changes: 1 addition & 1 deletion common/src/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub const MGS_PORT: u16 = 12225;
pub const WICKETD_PORT: u16 = 12226;
pub const BOOTSTRAP_ARTIFACT_PORT: u16 = 12227;
pub const CRUCIBLE_PANTRY_PORT: u16 = 17000;

pub const TFPORTD_PORT: u16 = 12231;
pub const NEXUS_INTERNAL_PORT: u16 = 12221;

/// The port on which Nexus exposes its external API on the underlay network.
Expand Down
132 changes: 132 additions & 0 deletions oximeter/oximeter/schema/tfport-data-link.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
format_version = 1

[target]
name = "tfport_data_link"
description = "A network data link on a tfport interface"
authz_scope = "fleet"

versions = [
{ version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] },
]

[fields.kind]
type = "string"
description = "The kind or class of the data link"

[fields.link_name]
type = "string"
description = "Name of the data link"

[fields.rack_id]
type = "uuid"
description = "ID for the link's rack"

[fields.sled_id]
type = "uuid"
description = "ID for the link's sled"

[fields.sled_model]
type = "string"
description = "Model number of the link's sled"

[fields.sled_revision]
type = "u32"
description = "Revision number of the sled"

[fields.sled_serial]
type = "string"
description = "Serial number of the sled"

[fields.switch_id]
type = "uuid"
description = "ID of the switch the link is on"

[fields.switch_fab]
type = "string"
description = "Fabrication plant identifier of the switch the link is on"

[fields.switch_lot]
type = "string"
description = "Lot number of the switch the link is on"

[fields.switch_wafer]
type = "u8"
description = "Wafer number of the switch the link is on"

[fields.switch_wafer_loc_x]
type = "i8"
description = "X-coordinate wafer location of the switch the link is on"

[fields.switch_wafer_loc_y]
type = "i8"
description = "Y-coordinate wafer location of the switch the link is on"

[fields.switch_model]
type = "string"
description = "The model number switch the link is on"

[fields.switch_revision]
type = "u32"
description = "Revision number of the switch the link is on"

[fields.switch_serial]
type = "string"
description = "Serial number of the switch the link is on"

[fields.switch_slot]
type = "u16"
description = "Slot number of the switch the link is on"

[[metrics]]
name = "bytes_sent"
description = "Number of bytes sent on the link"
units = "bytes"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "bytes_received"
description = "Number of bytes received on the link"
units = "bytes"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "packets_sent"
description = "Number of packets sent on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "packets_received"
description = "Number of packets received on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "errors_sent"
description = "Number of errors encountered when sending on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "errors_received"
description = "Number of errors encountered when receiving on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]
12 changes: 6 additions & 6 deletions package-manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "acea62c8838bae83b4849fb60463ceb26780449c"
source.sha256 = "09a4bced2d93e1dd492168aee4034efc982eb695503d0356a52ad0dc1a3b0624"
source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450"
source.sha256 = "b06803d6d788f1b6ba4200548bb5f976dc043481837ecff20e6c1095235bbe82"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -746,8 +746,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "acea62c8838bae83b4849fb60463ceb26780449c"
source.sha256 = "6c9576b2132d525cece9c9f39adfd4318291c58866535c9fde2739085af7ee3a"
source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450"
source.sha256 = "47b771e6159ab6cd4b90998c767cb1cc9c82e6554493147711a4a57a5302ffd8"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -766,8 +766,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "acea62c8838bae83b4849fb60463ceb26780449c"
source.sha256 = "d511e5173679b1b68ffe6e38bf01999bc17621a5618c558e4bb701dfbb46053c"
source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450"
source.sha256 = "edbd01f1ad1e5e7d12a96eaffe6efd5d40c194d39427c633ea385f8f98faad14"
output.type = "zone"
output.intermediate_only = true

Expand Down
16 changes: 8 additions & 8 deletions sled-agent/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,15 @@ async fn remove_datalink(
Ok(_) => {
debug!(
log,
"Removed VNIC from tracked links";
"removed VNIC from tracked links";
"link_name" => name,
);
}
Err(err) => {
error!(
log,
"Failed to remove VNIC from kstat sampler, \
metrics may still be produced for it";
"failed to remove VNIC from kstat sampler, \
metrics may still be produced for it";
"link_name" => name,
"error" => ?err,
);
Expand Down Expand Up @@ -263,7 +263,7 @@ async fn add_datalink(
Ok(id) => {
debug!(
log,
"Added new link to kstat sampler";
"added new link to kstat sampler";
"link_name" => entry.key(),
"link_kind" => %link.kind(),
"zone_name" => %link.zone_name(),
Expand All @@ -273,8 +273,8 @@ async fn add_datalink(
Err(err) => {
error!(
log,
"Failed to add VNIC to kstat sampler, \
no metrics will be collected for it";
"failed to add VNIC to kstat sampler, \
no metrics will be collected for it";
"link_name" => entry.key(),
"link_kind" => %link.kind(),
"zone_name" => %link.zone_name(),
Expand Down Expand Up @@ -310,14 +310,14 @@ async fn sync_sled_datalinks(
Ok(_) => {
debug!(
log,
"Updated link already tracked by kstat sampler";
"updated link already tracked by kstat sampler";
"link_name" => link_name,
);
}
Err(err) => {
error!(
log,
"Failed to update link already tracked by kstat sampler";
"failed to update link already tracked by kstat sampler";
"link_name" => link_name,
"error" => ?err,
);
Expand Down
128 changes: 118 additions & 10 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use omicron_common::address::LLDP_PORT;
use omicron_common::address::MGS_PORT;
use omicron_common::address::RACK_PREFIX;
use omicron_common::address::SLED_PREFIX;
use omicron_common::address::TFPORTD_PORT;
use omicron_common::address::WICKETD_NEXUS_PROXY_PORT;
use omicron_common::address::WICKETD_PORT;
use omicron_common::address::{
Expand Down Expand Up @@ -2837,17 +2838,67 @@ impl ServiceManager {
SwitchService::Tfport { pkt_source, asic } => {
info!(self.inner.log, "Setting up tfport service");
let mut tfport_config =
PropertyGroupBuilder::new("config")
PropertyGroupBuilder::new("config");

tfport_config = tfport_config
.add_property(
"host",
"astring",
&format!("[{}]", Ipv6Addr::LOCALHOST),
)
.add_property(
"port",
"astring",
&format!("{}", DENDRITE_PORT),
);
if let Some(i) = info {
tfport_config = tfport_config
.add_property(
"rack_id",
"astring",
&i.rack_id.to_string(),
)
.add_property(
"sled_id",
"astring",
&i.config
.sled_identifiers
.sled_id
.to_string(),
)
.add_property(
"sled_model",
"astring",
&i.config
.sled_identifiers
.model
.to_string(),
)
.add_property(
"host",
"sled_revision",
"astring",
&format!("[{}]", Ipv6Addr::LOCALHOST),
&i.config
.sled_identifiers
.revision
.to_string(),
)
.add_property(
"port",
"sled_serial",
"astring",
&format!("{}", DENDRITE_PORT),
&i.config
.sled_identifiers
.serial
.to_string(),
);
}

for address in addresses {
tfport_config = tfport_config.add_property(
"address",
"astring",
&format!("[{}]:{}", address, TFPORTD_PORT),
);
}

let is_gimlet = is_gimlet().map_err(|e| {
Error::Underlay(
Expand Down Expand Up @@ -2888,6 +2939,7 @@ impl ServiceManager {

if is_gimlet
|| asic == &DendriteAsic::SoftNpuPropolisDevice
|| asic == &DendriteAsic::TofinoAsic
{
tfport_config = tfport_config.add_property(
"pkt_source",
Expand Down Expand Up @@ -4411,7 +4463,7 @@ impl ServiceManager {
} else {
info!(
self.inner.log,
"no rack_id/sled_id available yet"
"no sled info available yet"
);
}
smfh.delpropvalue_default_instance(
Expand Down Expand Up @@ -4485,10 +4537,66 @@ impl ServiceManager {
smfh.refresh()?;
info!(self.inner.log, "refreshed lldpd service with new configuration")
}
SwitchService::Tfport { .. } => {
// Since tfport and dpd communicate using localhost,
// the tfport service shouldn't need to be
// restarted.
SwitchService::Tfport { pkt_source, asic } => {
info!(self.inner.log, "configuring tfport service");
if let Some(info) = self.inner.sled_info.get() {
smfh.setprop_default_instance(
"config/rack_id",
info.rack_id,
)?;
smfh.setprop_default_instance(
"config/sled_id",
info.config.sled_identifiers.sled_id,
)?;
smfh.setprop_default_instance(
"config/sled_model",
info.config
.sled_identifiers
.model
.to_string(),
)?;
smfh.setprop_default_instance(
"config/sled_revision",
info.config.sled_identifiers.revision,
)?;
smfh.setprop_default_instance(
"config/sled_serial",
info.config
.sled_identifiers
.serial
.to_string(),
)?;
} else {
info!(
self.inner.log,
"no sled info available yet"
);
}
smfh.delpropvalue_default_instance(
"config/address",
"*",
)?;
for address in &request.addresses {
smfh.addpropvalue_type_default_instance(
"config/address",
&format!("[{}]:{}", address, TFPORTD_PORT),
"astring",
)?;
}

match asic {
DendriteAsic::SoftNpuPropolisDevice
| DendriteAsic::TofinoAsic => {
smfh.setprop_default_instance(
"config/pkt_source",
pkt_source,
)?;
}
_ => {}
}

smfh.refresh()?;
info!(self.inner.log, "refreshed tfport service with new configuration")
}
SwitchService::Pumpkind { .. } => {
// Unless we want to plumb through the "only log
Expand Down
Loading

0 comments on commit 701b06f

Please sign in to comment.