Skip to content

Commit

Permalink
Add tfport-data-link timeseries schema and SMF config adds.
Browse files Browse the repository at this point in the history
Related to oxidecomputer/dendrite#1033.

New timeseries (from sled-data-link) as we've added switch information
in the metadata.
  • Loading branch information
zeeshanlakhani committed Nov 1, 2024
1 parent ab1f377 commit 819eded
Show file tree
Hide file tree
Showing 7 changed files with 270 additions and 30 deletions.
2 changes: 1 addition & 1 deletion common/src/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub const MGS_PORT: u16 = 12225;
pub const WICKETD_PORT: u16 = 12226;
pub const BOOTSTRAP_ARTIFACT_PORT: u16 = 12227;
pub const CRUCIBLE_PANTRY_PORT: u16 = 17000;

pub const TFPORTD_PORT: u16 = 12231;
pub const NEXUS_INTERNAL_PORT: u16 = 12221;

/// The port on which Nexus exposes its external API on the underlay network.
Expand Down
132 changes: 132 additions & 0 deletions oximeter/oximeter/schema/tfport-data-link.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
format_version = 1

[target]
name = "tfport_data_link"
description = "A network data link on a tfport interface"
authz_scope = "fleet"

versions = [
{ version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] },
]

[fields.kind]
type = "string"
description = "The kind or class of the data link"

[fields.link_name]
type = "string"
description = "Name of the data link"

[fields.rack_id]
type = "uuid"
description = "ID for the link's rack"

[fields.sled_id]
type = "uuid"
description = "ID for the link's sled"

[fields.sled_model]
type = "string"
description = "Model number of the link's sled"

[fields.sled_revision]
type = "u32"
description = "Revision number of the sled"

[fields.sled_serial]
type = "string"
description = "Serial number of the sled"

[fields.switch_id]
type = "uuid"
description = "ID of the switch the link is on"

[fields.switch_fab]
type = "string"
description = "Fabrication plant identifier of the switch the link is on"

[fields.switch_lot]
type = "string"
description = "Lot number of the switch the link is on"

[fields.switch_wafer]
type = "u8"
description = "Wafer number of the switch the link is on"

[fields.switch_wafer_loc_x]
type = "i8"
description = "X-coordinate wafer location of the switch the link is on"

[fields.switch_wafer_loc_y]
type = "i8"
description = "Y-coordinate wafer location of the switch the link is on"

[fields.switch_model]
type = "string"
description = "The model number switch the link is on"

[fields.switch_revision]
type = "u32"
description = "Revision number of the switch the link is on"

[fields.switch_serial]
type = "string"
description = "Serial number of the switch the link is on"

[fields.switch_slot]
type = "u16"
description = "Slot number of the switch the link is on"

[[metrics]]
name = "bytes_sent"
description = "Number of bytes sent on the link"
units = "bytes"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "bytes_received"
description = "Number of bytes received on the link"
units = "bytes"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "packets_sent"
description = "Number of packets sent on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "packets_received"
description = "Number of packets received on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "errors_sent"
description = "Number of errors encountered when sending on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]

[[metrics]]
name = "errors_received"
description = "Number of errors encountered when receiving on the link"
units = "count"
datum_type = "cumulative_u64"
versions = [
{ added_in = 1, fields = [] }
]
12 changes: 6 additions & 6 deletions package-manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -717,8 +717,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
source.sha256 = "5acb29662fa052fc7805716cf761c39bf0cef168c1e33d9fdb259104503203e5"
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
source.sha256 = "a79cc46ce4a994ca245c8d52f157521c4c32399ae8b6e6dbb4b54126c7d6dc6b"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -744,8 +744,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
source.sha256 = "2c1f901ac96028264fd904fb197d1aaef522432ad16097d3606321101813dfde"
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
source.sha256 = "05ae723439b432f66e76427805bb2d0fa28603dc28c22624f2a6f872fa6b3627"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -764,8 +764,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
source.sha256 = "94d1231d5b4ba5ea3cd7ce90ea0c46d43c87bfdd3e694cc99a78f8578ba1b9f7"
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
source.sha256 = "982a28193410e1641e0f8ac015c294610dc1bfd162bae8e68b9ba8ebf3800e75"
output.type = "zone"
output.intermediate_only = true

Expand Down
16 changes: 8 additions & 8 deletions sled-agent/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,15 @@ async fn remove_datalink(
Ok(_) => {
debug!(
log,
"Removed VNIC from tracked links";
"removed VNIC from tracked links";
"link_name" => name,
);
}
Err(err) => {
error!(
log,
"Failed to remove VNIC from kstat sampler, \
metrics may still be produced for it";
"failed to remove VNIC from kstat sampler, \
metrics may still be produced for it";
"link_name" => name,
"error" => ?err,
);
Expand Down Expand Up @@ -263,7 +263,7 @@ async fn add_datalink(
Ok(id) => {
debug!(
log,
"Added new link to kstat sampler";
"added new link to kstat sampler";
"link_name" => entry.key(),
"link_kind" => %link.kind(),
"zone_name" => %link.zone_name(),
Expand All @@ -273,8 +273,8 @@ async fn add_datalink(
Err(err) => {
error!(
log,
"Failed to add VNIC to kstat sampler, \
no metrics will be collected for it";
"failed to add VNIC to kstat sampler, \
no metrics will be collected for it";
"link_name" => entry.key(),
"link_kind" => %link.kind(),
"zone_name" => %link.zone_name(),
Expand Down Expand Up @@ -310,14 +310,14 @@ async fn sync_sled_datalinks(
Ok(_) => {
debug!(
log,
"Updated link already tracked by kstat sampler";
"updated link already tracked by kstat sampler";
"link_name" => link_name,
);
}
Err(err) => {
error!(
log,
"Failed to update link already tracked by kstat sampler";
"failed to update link already tracked by kstat sampler";
"link_name" => link_name,
"error" => ?err,
);
Expand Down
128 changes: 118 additions & 10 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ use omicron_common::address::LLDP_PORT;
use omicron_common::address::MGS_PORT;
use omicron_common::address::RACK_PREFIX;
use omicron_common::address::SLED_PREFIX;
use omicron_common::address::TFPORTD_PORT;
use omicron_common::address::WICKETD_NEXUS_PROXY_PORT;
use omicron_common::address::WICKETD_PORT;
use omicron_common::address::{
Expand Down Expand Up @@ -2852,17 +2853,67 @@ impl ServiceManager {
SwitchService::Tfport { pkt_source, asic } => {
info!(self.inner.log, "Setting up tfport service");
let mut tfport_config =
PropertyGroupBuilder::new("config")
PropertyGroupBuilder::new("config");

tfport_config = tfport_config
.add_property(
"host",
"astring",
&format!("[{}]", Ipv6Addr::LOCALHOST),
)
.add_property(
"port",
"astring",
&format!("{}", DENDRITE_PORT),
);
if let Some(i) = info {
tfport_config = tfport_config
.add_property(
"rack_id",
"astring",
&i.rack_id.to_string(),
)
.add_property(
"sled_id",
"astring",
&i.config
.sled_identifiers
.sled_id
.to_string(),
)
.add_property(
"sled_model",
"astring",
&i.config
.sled_identifiers
.model
.to_string(),
)
.add_property(
"host",
"sled_revision",
"astring",
&format!("[{}]", Ipv6Addr::LOCALHOST),
&i.config
.sled_identifiers
.revision
.to_string(),
)
.add_property(
"port",
"sled_serial",
"astring",
&format!("{}", DENDRITE_PORT),
&i.config
.sled_identifiers
.serial
.to_string(),
);
}

for address in addresses {
tfport_config = tfport_config.add_property(
"address",
"astring",
&format!("[{}]:{}", address, TFPORTD_PORT),
);
}

let is_gimlet = is_gimlet().map_err(|e| {
Error::Underlay(
Expand Down Expand Up @@ -2903,6 +2954,7 @@ impl ServiceManager {

if is_gimlet
|| asic == &DendriteAsic::SoftNpuPropolisDevice
|| asic == &DendriteAsic::TofinoAsic
{
tfport_config = tfport_config.add_property(
"pkt_source",
Expand Down Expand Up @@ -4426,7 +4478,7 @@ impl ServiceManager {
} else {
info!(
self.inner.log,
"no rack_id/sled_id available yet"
"no sled info available yet"
);
}
smfh.delpropvalue_default_instance(
Expand Down Expand Up @@ -4500,10 +4552,66 @@ impl ServiceManager {
smfh.refresh()?;
info!(self.inner.log, "refreshed lldpd service with new configuration")
}
SwitchService::Tfport { .. } => {
// Since tfport and dpd communicate using localhost,
// the tfport service shouldn't need to be
// restarted.
SwitchService::Tfport { pkt_source, asic } => {
info!(self.inner.log, "configuring tfport service");
if let Some(info) = self.inner.sled_info.get() {
smfh.setprop_default_instance(
"config/rack_id",
info.rack_id,
)?;
smfh.setprop_default_instance(
"config/sled_id",
info.config.sled_identifiers.sled_id,
)?;
smfh.setprop_default_instance(
"config/sled_model",
info.config
.sled_identifiers
.model
.to_string(),
)?;
smfh.setprop_default_instance(
"config/sled_revision",
info.config.sled_identifiers.revision,
)?;
smfh.setprop_default_instance(
"config/sled_serial",
info.config
.sled_identifiers
.serial
.to_string(),
)?;
} else {
info!(
self.inner.log,
"no sled info available yet"
);
}
smfh.delpropvalue_default_instance(
"config/address",
"*",
)?;
for address in &request.addresses {
smfh.addpropvalue_type_default_instance(
"config/address",
&format!("[{}]:{}", address, TFPORTD_PORT),
"astring",
)?;
}

match asic {
DendriteAsic::SoftNpuPropolisDevice
| DendriteAsic::TofinoAsic => {
smfh.setprop_default_instance(
"config/pkt_source",
pkt_source,
)?;
}
_ => {}
}

smfh.refresh()?;
info!(self.inner.log, "refreshed tfport service with new configuration")
}
SwitchService::Pumpkind { .. } => {
// Unless we want to plumb through the "only log
Expand Down
Loading

0 comments on commit 819eded

Please sign in to comment.