From 701b06f401a6f4c8891c2e670fe9083803a6d331 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Tue, 15 Oct 2024 20:32:21 +0000 Subject: [PATCH] Add tfport-data-link timeseries schema and SMF config adds. Related to https://github.com/oxidecomputer/dendrite/pull/1033. New timeseries (from sled-data-link) as we've added switch information in the metadata. --- common/src/address.rs | 2 +- .../oximeter/schema/tfport-data-link.toml | 132 ++++++++++++++++++ package-manifest.toml | 12 +- sled-agent/src/metrics.rs | 16 +-- sled-agent/src/services.rs | 128 +++++++++++++++-- tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +- 7 files changed, 269 insertions(+), 29 deletions(-) create mode 100644 oximeter/oximeter/schema/tfport-data-link.toml diff --git a/common/src/address.rs b/common/src/address.rs index 7e6d68ebc8e..a311bd688ed 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -49,7 +49,7 @@ pub const MGS_PORT: u16 = 12225; pub const WICKETD_PORT: u16 = 12226; pub const BOOTSTRAP_ARTIFACT_PORT: u16 = 12227; pub const CRUCIBLE_PANTRY_PORT: u16 = 17000; - +pub const TFPORTD_PORT: u16 = 12231; pub const NEXUS_INTERNAL_PORT: u16 = 12221; /// The port on which Nexus exposes its external API on the underlay network. diff --git a/oximeter/oximeter/schema/tfport-data-link.toml b/oximeter/oximeter/schema/tfport-data-link.toml new file mode 100644 index 00000000000..6492733227d --- /dev/null +++ b/oximeter/oximeter/schema/tfport-data-link.toml @@ -0,0 +1,132 @@ +format_version = 1 + +[target] +name = "tfport_data_link" +description = "A network data link on a tfport interface" +authz_scope = "fleet" + +versions = [ + { version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] }, +] + +[fields.kind] +type = "string" +description = "The kind or class of the data link" + +[fields.link_name] +type = "string" +description = "Name of the data link" + +[fields.rack_id] +type = "uuid" +description = "ID for the link's rack" + +[fields.sled_id] +type = "uuid" +description = "ID for the link's sled" + +[fields.sled_model] +type = "string" +description = "Model number of the link's sled" + +[fields.sled_revision] +type = "u32" +description = "Revision number of the sled" + +[fields.sled_serial] +type = "string" +description = "Serial number of the sled" + +[fields.switch_id] +type = "uuid" +description = "ID of the switch the link is on" + +[fields.switch_fab] +type = "string" +description = "Fabrication plant identifier of the switch the link is on" + +[fields.switch_lot] +type = "string" +description = "Lot number of the switch the link is on" + +[fields.switch_wafer] +type = "u8" +description = "Wafer number of the switch the link is on" + +[fields.switch_wafer_loc_x] +type = "i8" +description = "X-coordinate wafer location of the switch the link is on" + +[fields.switch_wafer_loc_y] +type = "i8" +description = "Y-coordinate wafer location of the switch the link is on" + +[fields.switch_model] +type = "string" +description = "The model number switch the link is on" + +[fields.switch_revision] +type = "u32" +description = "Revision number of the switch the link is on" + +[fields.switch_serial] +type = "string" +description = "Serial number of the switch the link is on" + +[fields.switch_slot] +type = "u16" +description = "Slot number of the switch the link is on" + +[[metrics]] +name = "bytes_sent" +description = "Number of bytes sent on the link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "bytes_received" +description = "Number of bytes received on the link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "packets_sent" +description = "Number of packets sent on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "packets_received" +description = "Number of packets received on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "errors_sent" +description = "Number of errors encountered when sending on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "errors_received" +description = "Number of errors encountered when receiving on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] diff --git a/package-manifest.toml b/package-manifest.toml index 4d40d1902aa..0acf9ee0b0d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -719,8 +719,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "acea62c8838bae83b4849fb60463ceb26780449c" -source.sha256 = "09a4bced2d93e1dd492168aee4034efc982eb695503d0356a52ad0dc1a3b0624" +source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450" +source.sha256 = "b06803d6d788f1b6ba4200548bb5f976dc043481837ecff20e6c1095235bbe82" output.type = "zone" output.intermediate_only = true @@ -746,8 +746,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "acea62c8838bae83b4849fb60463ceb26780449c" -source.sha256 = "6c9576b2132d525cece9c9f39adfd4318291c58866535c9fde2739085af7ee3a" +source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450" +source.sha256 = "47b771e6159ab6cd4b90998c767cb1cc9c82e6554493147711a4a57a5302ffd8" output.type = "zone" output.intermediate_only = true @@ -766,8 +766,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "acea62c8838bae83b4849fb60463ceb26780449c" -source.sha256 = "d511e5173679b1b68ffe6e38bf01999bc17621a5618c558e4bb701dfbb46053c" +source.commit = "0647fb31bdee8d0beee760961dcdb549e4a9c450" +source.sha256 = "edbd01f1ad1e5e7d12a96eaffe6efd5d40c194d39427c633ea385f8f98faad14" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index ef9edd9a079..d07abb94ade 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -223,15 +223,15 @@ async fn remove_datalink( Ok(_) => { debug!( log, - "Removed VNIC from tracked links"; + "removed VNIC from tracked links"; "link_name" => name, ); } Err(err) => { error!( log, - "Failed to remove VNIC from kstat sampler, \ - metrics may still be produced for it"; + "failed to remove VNIC from kstat sampler, \ + metrics may still be produced for it"; "link_name" => name, "error" => ?err, ); @@ -263,7 +263,7 @@ async fn add_datalink( Ok(id) => { debug!( log, - "Added new link to kstat sampler"; + "added new link to kstat sampler"; "link_name" => entry.key(), "link_kind" => %link.kind(), "zone_name" => %link.zone_name(), @@ -273,8 +273,8 @@ async fn add_datalink( Err(err) => { error!( log, - "Failed to add VNIC to kstat sampler, \ - no metrics will be collected for it"; + "failed to add VNIC to kstat sampler, \ + no metrics will be collected for it"; "link_name" => entry.key(), "link_kind" => %link.kind(), "zone_name" => %link.zone_name(), @@ -310,14 +310,14 @@ async fn sync_sled_datalinks( Ok(_) => { debug!( log, - "Updated link already tracked by kstat sampler"; + "updated link already tracked by kstat sampler"; "link_name" => link_name, ); } Err(err) => { error!( log, - "Failed to update link already tracked by kstat sampler"; + "failed to update link already tracked by kstat sampler"; "link_name" => link_name, "error" => ?err, ); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 62c15de21b9..7bb3e7fcec1 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -72,6 +72,7 @@ use omicron_common::address::LLDP_PORT; use omicron_common::address::MGS_PORT; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; +use omicron_common::address::TFPORTD_PORT; use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{ @@ -2837,17 +2838,67 @@ impl ServiceManager { SwitchService::Tfport { pkt_source, asic } => { info!(self.inner.log, "Setting up tfport service"); let mut tfport_config = - PropertyGroupBuilder::new("config") + PropertyGroupBuilder::new("config"); + + tfport_config = tfport_config + .add_property( + "host", + "astring", + &format!("[{}]", Ipv6Addr::LOCALHOST), + ) + .add_property( + "port", + "astring", + &format!("{}", DENDRITE_PORT), + ); + if let Some(i) = info { + tfport_config = tfport_config + .add_property( + "rack_id", + "astring", + &i.rack_id.to_string(), + ) + .add_property( + "sled_id", + "astring", + &i.config + .sled_identifiers + .sled_id + .to_string(), + ) + .add_property( + "sled_model", + "astring", + &i.config + .sled_identifiers + .model + .to_string(), + ) .add_property( - "host", + "sled_revision", "astring", - &format!("[{}]", Ipv6Addr::LOCALHOST), + &i.config + .sled_identifiers + .revision + .to_string(), ) .add_property( - "port", + "sled_serial", "astring", - &format!("{}", DENDRITE_PORT), + &i.config + .sled_identifiers + .serial + .to_string(), ); + } + + for address in addresses { + tfport_config = tfport_config.add_property( + "address", + "astring", + &format!("[{}]:{}", address, TFPORTD_PORT), + ); + } let is_gimlet = is_gimlet().map_err(|e| { Error::Underlay( @@ -2888,6 +2939,7 @@ impl ServiceManager { if is_gimlet || asic == &DendriteAsic::SoftNpuPropolisDevice + || asic == &DendriteAsic::TofinoAsic { tfport_config = tfport_config.add_property( "pkt_source", @@ -4411,7 +4463,7 @@ impl ServiceManager { } else { info!( self.inner.log, - "no rack_id/sled_id available yet" + "no sled info available yet" ); } smfh.delpropvalue_default_instance( @@ -4485,10 +4537,66 @@ impl ServiceManager { smfh.refresh()?; info!(self.inner.log, "refreshed lldpd service with new configuration") } - SwitchService::Tfport { .. } => { - // Since tfport and dpd communicate using localhost, - // the tfport service shouldn't need to be - // restarted. + SwitchService::Tfport { pkt_source, asic } => { + info!(self.inner.log, "configuring tfport service"); + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop_default_instance( + "config/rack_id", + info.rack_id, + )?; + smfh.setprop_default_instance( + "config/sled_id", + info.config.sled_identifiers.sled_id, + )?; + smfh.setprop_default_instance( + "config/sled_model", + info.config + .sled_identifiers + .model + .to_string(), + )?; + smfh.setprop_default_instance( + "config/sled_revision", + info.config.sled_identifiers.revision, + )?; + smfh.setprop_default_instance( + "config/sled_serial", + info.config + .sled_identifiers + .serial + .to_string(), + )?; + } else { + info!( + self.inner.log, + "no sled info available yet" + ); + } + smfh.delpropvalue_default_instance( + "config/address", + "*", + )?; + for address in &request.addresses { + smfh.addpropvalue_type_default_instance( + "config/address", + &format!("[{}]:{}", address, TFPORTD_PORT), + "astring", + )?; + } + + match asic { + DendriteAsic::SoftNpuPropolisDevice + | DendriteAsic::TofinoAsic => { + smfh.setprop_default_instance( + "config/pkt_source", + pkt_source, + )?; + } + _ => {} + } + + smfh.refresh()?; + info!(self.inner.log, "refreshed tfport service with new configuration") } SwitchService::Pumpkind { .. } => { // Unless we want to plumb through the "only log diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index d039f4db151..cdfaa2c7ab1 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="acea62c8838bae83b4849fb60463ceb26780449c" +COMMIT="0647fb31bdee8d0beee760961dcdb549e4a9c450" SHA2="f4598904bcc585066c18eebadbd7dd5d23a80c8fda38fb2a9aa20c6499636d07" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index af0209375b3..99b318c434d 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="09a4bced2d93e1dd492168aee4034efc982eb695503d0356a52ad0dc1a3b0624" -CIDL_SHA256_LINUX_DPD="4fd5a8e9a515482bc6ec3707a90c9c28c00556a18871340fad3022bcc3aa43b2" -CIDL_SHA256_LINUX_SWADM="7f0acc2e7128c616d8f8911ced34c26f03951ae68ba70b7f543edb7c44524f86" +CIDL_SHA256_ILLUMOS="b06803d6d788f1b6ba4200548bb5f976dc043481837ecff20e6c1095235bbe82" +CIDL_SHA256_LINUX_DPD="b921d49daa9d644a0a01b92c11fd32edccf9104f0fa2e19b045b3d6ca002923e" +CIDL_SHA256_LINUX_SWADM="0f15487b3dc752d031670e4e94194a0c19952532d72c11a254ce49b608d2f57f"