From 819ededa46ec4eb562ed37bdf2aa2357f38808c5 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Tue, 15 Oct 2024 20:32:21 +0000 Subject: [PATCH] Add tfport-data-link timeseries schema and SMF config adds. Related to https://github.com/oxidecomputer/dendrite/pull/1033. New timeseries (from sled-data-link) as we've added switch information in the metadata. --- common/src/address.rs | 2 +- .../oximeter/schema/tfport-data-link.toml | 132 ++++++++++++++++++ package-manifest.toml | 12 +- sled-agent/src/metrics.rs | 16 +-- sled-agent/src/services.rs | 128 +++++++++++++++-- tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- 7 files changed, 270 insertions(+), 30 deletions(-) create mode 100644 oximeter/oximeter/schema/tfport-data-link.toml diff --git a/common/src/address.rs b/common/src/address.rs index 7cf00d5228c..8d2e9108df8 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -48,7 +48,7 @@ pub const MGS_PORT: u16 = 12225; pub const WICKETD_PORT: u16 = 12226; pub const BOOTSTRAP_ARTIFACT_PORT: u16 = 12227; pub const CRUCIBLE_PANTRY_PORT: u16 = 17000; - +pub const TFPORTD_PORT: u16 = 12231; pub const NEXUS_INTERNAL_PORT: u16 = 12221; /// The port on which Nexus exposes its external API on the underlay network. diff --git a/oximeter/oximeter/schema/tfport-data-link.toml b/oximeter/oximeter/schema/tfport-data-link.toml new file mode 100644 index 00000000000..6492733227d --- /dev/null +++ b/oximeter/oximeter/schema/tfport-data-link.toml @@ -0,0 +1,132 @@ +format_version = 1 + +[target] +name = "tfport_data_link" +description = "A network data link on a tfport interface" +authz_scope = "fleet" + +versions = [ + { version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] }, +] + +[fields.kind] +type = "string" +description = "The kind or class of the data link" + +[fields.link_name] +type = "string" +description = "Name of the data link" + +[fields.rack_id] +type = "uuid" +description = "ID for the link's rack" + +[fields.sled_id] +type = "uuid" +description = "ID for the link's sled" + +[fields.sled_model] +type = "string" +description = "Model number of the link's sled" + +[fields.sled_revision] +type = "u32" +description = "Revision number of the sled" + +[fields.sled_serial] +type = "string" +description = "Serial number of the sled" + +[fields.switch_id] +type = "uuid" +description = "ID of the switch the link is on" + +[fields.switch_fab] +type = "string" +description = "Fabrication plant identifier of the switch the link is on" + +[fields.switch_lot] +type = "string" +description = "Lot number of the switch the link is on" + +[fields.switch_wafer] +type = "u8" +description = "Wafer number of the switch the link is on" + +[fields.switch_wafer_loc_x] +type = "i8" +description = "X-coordinate wafer location of the switch the link is on" + +[fields.switch_wafer_loc_y] +type = "i8" +description = "Y-coordinate wafer location of the switch the link is on" + +[fields.switch_model] +type = "string" +description = "The model number switch the link is on" + +[fields.switch_revision] +type = "u32" +description = "Revision number of the switch the link is on" + +[fields.switch_serial] +type = "string" +description = "Serial number of the switch the link is on" + +[fields.switch_slot] +type = "u16" +description = "Slot number of the switch the link is on" + +[[metrics]] +name = "bytes_sent" +description = "Number of bytes sent on the link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "bytes_received" +description = "Number of bytes received on the link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "packets_sent" +description = "Number of packets sent on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "packets_received" +description = "Number of packets received on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "errors_sent" +description = "Number of errors encountered when sending on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "errors_received" +description = "Number of errors encountered when receiving on the link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] diff --git a/package-manifest.toml b/package-manifest.toml index a3f69dcd57e..c711b1d328b 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -717,8 +717,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "b425a6c783b3081613ffa00407f271cae06568c4" -source.sha256 = "5acb29662fa052fc7805716cf761c39bf0cef168c1e33d9fdb259104503203e5" +source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166" +source.sha256 = "a79cc46ce4a994ca245c8d52f157521c4c32399ae8b6e6dbb4b54126c7d6dc6b" output.type = "zone" output.intermediate_only = true @@ -744,8 +744,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "b425a6c783b3081613ffa00407f271cae06568c4" -source.sha256 = "2c1f901ac96028264fd904fb197d1aaef522432ad16097d3606321101813dfde" +source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166" +source.sha256 = "05ae723439b432f66e76427805bb2d0fa28603dc28c22624f2a6f872fa6b3627" output.type = "zone" output.intermediate_only = true @@ -764,8 +764,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "b425a6c783b3081613ffa00407f271cae06568c4" -source.sha256 = "94d1231d5b4ba5ea3cd7ce90ea0c46d43c87bfdd3e694cc99a78f8578ba1b9f7" +source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166" +source.sha256 = "982a28193410e1641e0f8ac015c294610dc1bfd162bae8e68b9ba8ebf3800e75" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index ef9edd9a079..d07abb94ade 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -223,15 +223,15 @@ async fn remove_datalink( Ok(_) => { debug!( log, - "Removed VNIC from tracked links"; + "removed VNIC from tracked links"; "link_name" => name, ); } Err(err) => { error!( log, - "Failed to remove VNIC from kstat sampler, \ - metrics may still be produced for it"; + "failed to remove VNIC from kstat sampler, \ + metrics may still be produced for it"; "link_name" => name, "error" => ?err, ); @@ -263,7 +263,7 @@ async fn add_datalink( Ok(id) => { debug!( log, - "Added new link to kstat sampler"; + "added new link to kstat sampler"; "link_name" => entry.key(), "link_kind" => %link.kind(), "zone_name" => %link.zone_name(), @@ -273,8 +273,8 @@ async fn add_datalink( Err(err) => { error!( log, - "Failed to add VNIC to kstat sampler, \ - no metrics will be collected for it"; + "failed to add VNIC to kstat sampler, \ + no metrics will be collected for it"; "link_name" => entry.key(), "link_kind" => %link.kind(), "zone_name" => %link.zone_name(), @@ -310,14 +310,14 @@ async fn sync_sled_datalinks( Ok(_) => { debug!( log, - "Updated link already tracked by kstat sampler"; + "updated link already tracked by kstat sampler"; "link_name" => link_name, ); } Err(err) => { error!( log, - "Failed to update link already tracked by kstat sampler"; + "failed to update link already tracked by kstat sampler"; "link_name" => link_name, "error" => ?err, ); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9d23638c9e9..b3871a6c623 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -77,6 +77,7 @@ use omicron_common::address::LLDP_PORT; use omicron_common::address::MGS_PORT; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; +use omicron_common::address::TFPORTD_PORT; use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{ @@ -2852,17 +2853,67 @@ impl ServiceManager { SwitchService::Tfport { pkt_source, asic } => { info!(self.inner.log, "Setting up tfport service"); let mut tfport_config = - PropertyGroupBuilder::new("config") + PropertyGroupBuilder::new("config"); + + tfport_config = tfport_config + .add_property( + "host", + "astring", + &format!("[{}]", Ipv6Addr::LOCALHOST), + ) + .add_property( + "port", + "astring", + &format!("{}", DENDRITE_PORT), + ); + if let Some(i) = info { + tfport_config = tfport_config + .add_property( + "rack_id", + "astring", + &i.rack_id.to_string(), + ) + .add_property( + "sled_id", + "astring", + &i.config + .sled_identifiers + .sled_id + .to_string(), + ) + .add_property( + "sled_model", + "astring", + &i.config + .sled_identifiers + .model + .to_string(), + ) .add_property( - "host", + "sled_revision", "astring", - &format!("[{}]", Ipv6Addr::LOCALHOST), + &i.config + .sled_identifiers + .revision + .to_string(), ) .add_property( - "port", + "sled_serial", "astring", - &format!("{}", DENDRITE_PORT), + &i.config + .sled_identifiers + .serial + .to_string(), ); + } + + for address in addresses { + tfport_config = tfport_config.add_property( + "address", + "astring", + &format!("[{}]:{}", address, TFPORTD_PORT), + ); + } let is_gimlet = is_gimlet().map_err(|e| { Error::Underlay( @@ -2903,6 +2954,7 @@ impl ServiceManager { if is_gimlet || asic == &DendriteAsic::SoftNpuPropolisDevice + || asic == &DendriteAsic::TofinoAsic { tfport_config = tfport_config.add_property( "pkt_source", @@ -4426,7 +4478,7 @@ impl ServiceManager { } else { info!( self.inner.log, - "no rack_id/sled_id available yet" + "no sled info available yet" ); } smfh.delpropvalue_default_instance( @@ -4500,10 +4552,66 @@ impl ServiceManager { smfh.refresh()?; info!(self.inner.log, "refreshed lldpd service with new configuration") } - SwitchService::Tfport { .. } => { - // Since tfport and dpd communicate using localhost, - // the tfport service shouldn't need to be - // restarted. + SwitchService::Tfport { pkt_source, asic } => { + info!(self.inner.log, "configuring tfport service"); + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop_default_instance( + "config/rack_id", + info.rack_id, + )?; + smfh.setprop_default_instance( + "config/sled_id", + info.config.sled_identifiers.sled_id, + )?; + smfh.setprop_default_instance( + "config/sled_model", + info.config + .sled_identifiers + .model + .to_string(), + )?; + smfh.setprop_default_instance( + "config/sled_revision", + info.config.sled_identifiers.revision, + )?; + smfh.setprop_default_instance( + "config/sled_serial", + info.config + .sled_identifiers + .serial + .to_string(), + )?; + } else { + info!( + self.inner.log, + "no sled info available yet" + ); + } + smfh.delpropvalue_default_instance( + "config/address", + "*", + )?; + for address in &request.addresses { + smfh.addpropvalue_type_default_instance( + "config/address", + &format!("[{}]:{}", address, TFPORTD_PORT), + "astring", + )?; + } + + match asic { + DendriteAsic::SoftNpuPropolisDevice + | DendriteAsic::TofinoAsic => { + smfh.setprop_default_instance( + "config/pkt_source", + pkt_source, + )?; + } + _ => {} + } + + smfh.refresh()?; + info!(self.inner.log, "refreshed tfport service with new configuration") } SwitchService::Pumpkind { .. } => { // Unless we want to plumb through the "only log diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index eabe11c065b..b63386197ec 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="b425a6c783b3081613ffa00407f271cae06568c4" -SHA2="20567cfb7028ab9ca08ce9423c9818f9151519dea789c3cb3016bc4cbb3927a4" +COMMIT="a502a30b43b32ad4c884ab4e1e6d87bc01387166" +SHA2="b1bcf5ad3c2ce556d861367aa2942340a7ccfe592c83e96dd06923cc151ac26b" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 2b24e4502cc..b3edb50bd81 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="5acb29662fa052fc7805716cf761c39bf0cef168c1e33d9fdb259104503203e5" -CIDL_SHA256_LINUX_DPD="78c5f9d3a71767e1b1de44ad289ccef5ae79faed8a19e4791f6b32f3f948b4c1" -CIDL_SHA256_LINUX_SWADM="731abee76a514f677bcbd58af42efca3ecdbdd25d03e45419e6b32e3dc04000d" +CIDL_SHA256_ILLUMOS="a79cc46ce4a994ca245c8d52f157521c4c32399ae8b6e6dbb4b54126c7d6dc6b" +CIDL_SHA256_LINUX_DPD="98c79855d199e8c56b7c038641b4d5b3de9bfa9ac610b65a6116bd36f0702ecf" +CIDL_SHA256_LINUX_SWADM="cce690cba4bd585c0257bdc95cc257d04534c6b695d154341c9915cebd8124f9"