From 520c4d1e10a010ee8c887902321e969f0bcfd3be Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Wed, 6 Nov 2024 19:03:51 -0500 Subject: [PATCH] Update SMF properties for Dendrite to include sled identifiers (#6793) Includes: - package-manifest update to match the properties added to dpd - removal of client call to get sled identifier information (no longer used) - switch_slot added to oximeter schemas for dendrite/switch-data-link Related: - https://github.com/oxidecomputer/dendrite/pull/1031 --- clients/sled-agent-client/src/lib.rs | 30 ------- .../replicated/13/timeseries-to-delete.txt | 25 ++++++ .../single-node/13/timeseries-to-delete.txt | 25 ++++++ oximeter/oximeter/schema/dendrite.toml | 26 +++++- .../oximeter/schema/switch-data-link.toml | 27 +++++- package-manifest.toml | 12 +-- sled-agent/src/services.rs | 87 ++++++++++++++++--- sled-agent/src/sled_agent.rs | 8 +- tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- 10 files changed, 189 insertions(+), 61 deletions(-) create mode 100644 oximeter/db/schema/replicated/13/timeseries-to-delete.txt create mode 100644 oximeter/db/schema/single-node/13/timeseries-to-delete.txt diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 0398e15e96..800123b116 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -391,36 +391,6 @@ impl From } } -impl From - for types::SledIdentifiers -{ - fn from( - value: omicron_common::api::internal::shared::SledIdentifiers, - ) -> Self { - Self { - model: value.model, - rack_id: value.rack_id, - revision: value.revision, - serial: value.serial, - sled_id: value.sled_id, - } - } -} - -impl From - for omicron_common::api::internal::shared::SledIdentifiers -{ - fn from(value: types::SledIdentifiers) -> Self { - Self { - model: value.model, - rack_id: value.rack_id, - revision: value.revision, - serial: value.serial, - sled_id: value.sled_id, - } - } -} - /// Exposes additional [`Client`] interfaces for use by the test suite. These /// are bonus endpoints, not generated in the real client. #[async_trait] diff --git a/oximeter/db/schema/replicated/13/timeseries-to-delete.txt b/oximeter/db/schema/replicated/13/timeseries-to-delete.txt new file mode 100644 index 0000000000..da3f1d6376 --- /dev/null +++ b/oximeter/db/schema/replicated/13/timeseries-to-delete.txt @@ -0,0 +1,25 @@ +switch_data_link:bytes_sent +switch_data_link:bytes_received +switch_data_link:errors_sent +switch_data_link:errors_received +switch_data_link:receive_crc_error_drops +switch_data_link:receive_buffer_full_drops +switch_data_link:packets_sent +switch_data_link:packets_received +switch_data_link:link_up +switch_data_link:link_enabled +switch_data_link:link_fsm +switch_data_link:pcs_bad_sync_headers +switch_data_link:pcs_errored_blocks +switch_data_link:pcs_block_lock_loss +switch_data_link:pcs_high_ber +switch_data_link:pcs_valid_errors +switch_data_link:pcs_invalid_errors +switch_data_link:pcs_unknown_errors +switch_data_link:pcs_sync_loss +switch_data_link:fec_high_symbol_errors +switch_data_link:fec_sync_aligned +switch_data_link:fec_corrected_blocks +switch_data_link:fec_uncorrected_blocks +switch_data_link:fec_symbol_errors +dendrite:sample_collection_duration diff --git a/oximeter/db/schema/single-node/13/timeseries-to-delete.txt b/oximeter/db/schema/single-node/13/timeseries-to-delete.txt new file mode 100644 index 0000000000..da3f1d6376 --- /dev/null +++ b/oximeter/db/schema/single-node/13/timeseries-to-delete.txt @@ -0,0 +1,25 @@ +switch_data_link:bytes_sent +switch_data_link:bytes_received +switch_data_link:errors_sent +switch_data_link:errors_received +switch_data_link:receive_crc_error_drops +switch_data_link:receive_buffer_full_drops +switch_data_link:packets_sent +switch_data_link:packets_received +switch_data_link:link_up +switch_data_link:link_enabled +switch_data_link:link_fsm +switch_data_link:pcs_bad_sync_headers +switch_data_link:pcs_errored_blocks +switch_data_link:pcs_block_lock_loss +switch_data_link:pcs_high_ber +switch_data_link:pcs_valid_errors +switch_data_link:pcs_invalid_errors +switch_data_link:pcs_unknown_errors +switch_data_link:pcs_sync_loss +switch_data_link:fec_high_symbol_errors +switch_data_link:fec_sync_aligned +switch_data_link:fec_corrected_blocks +switch_data_link:fec_uncorrected_blocks +switch_data_link:fec_symbol_errors +dendrite:sample_collection_duration diff --git a/oximeter/oximeter/schema/dendrite.toml b/oximeter/oximeter/schema/dendrite.toml index e822069a2f..ca1497b447 100644 --- a/oximeter/oximeter/schema/dendrite.toml +++ b/oximeter/oximeter/schema/dendrite.toml @@ -18,7 +18,7 @@ versions = [ # switch for its statistics, which is why these fields are included. # Dendrite may eventually report statistics about itself, or other aspects # not related to the switch, so they belong here, not the target. - { added_in = 1, fields = [ "switch_model", "switch_revision", "switch_id", "switch_serial" ] } + { added_in = 1, fields = [ "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] } ] [fields.rack_id] @@ -53,6 +53,30 @@ description = "Revision number of the switch being managed" type = "uuid" description = "ID of the switch being managed" +[fields.switch_fab] +type = "string" +description = "Fabrication plant identifier of the switch the link is on" + +[fields.switch_lot] +type = "string" +description = "Lot number of the switch the link is on" + +[fields.switch_wafer] +type = "u8" +description = "Wafer number of the switch the link is on" + +[fields.switch_wafer_loc_x] +type = "i16" +description = "X-coordinate wafer location of the switch the link is on" + +[fields.switch_wafer_loc_y] +type = "i16" +description = "Y-coordinate wafer location of the switch the link is on" + [fields.switch_serial] type = "string" description = "Serial number of the switch being managed" + +[fields.switch_slot] +type = "u16" +description = "Slot number of the switch the link is on" diff --git a/oximeter/oximeter/schema/switch-data-link.toml b/oximeter/oximeter/schema/switch-data-link.toml index d6744e8c7f..dce4841a09 100644 --- a/oximeter/oximeter/schema/switch-data-link.toml +++ b/oximeter/oximeter/schema/switch-data-link.toml @@ -4,8 +4,9 @@ format_version = 1 name = "switch_data_link" description = "A network data link on an Oxide switch" authz_scope = "fleet" + versions = [ - { version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_model", "switch_revision", "switch_serial" ] }, + { version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] }, ] [[metrics]] @@ -251,6 +252,26 @@ description = "Serial number of the sled managing the link's switch" type = "uuid" description = "ID of the switch the link is on" +[fields.switch_fab] +type = "string" +description = "Fabrication plant identifier of the switch the link is on" + +[fields.switch_lot] +type = "string" +description = "Lot number of the switch the link is on" + +[fields.switch_wafer] +type = "u8" +description = "Wafer number of the switch the link is on" + +[fields.switch_wafer_loc_x] +type = "i16" +description = "X-coordinate wafer location of the switch the link is on" + +[fields.switch_wafer_loc_y] +type = "i16" +description = "Y-coordinate wafer location of the switch the link is on" + [fields.switch_model] type = "string" description = "The model number switch the link is on" @@ -263,6 +284,10 @@ description = "Revision number of the switch the link is on" type = "string" description = "Serial number of the switch the link is on" +[fields.switch_slot] +type = "u16" +description = "Slot number of the switch the link is on" + [fields.port_id] type = "string" description = "Physical switch port the link is on" diff --git a/package-manifest.toml b/package-manifest.toml index fffa89640d..613948a2e9 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -719,8 +719,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "4067d742d832fa434217b95e4b149048d01ef54e" -source.sha256 = "5e9ccc42e5ac31f4be24025d2afd5978aef33d618f3cb7caa260eff73b7e6a79" +source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f" +source.sha256 = "69fc73a4596777c0c4fbce002ec965094562d170e6c009d6743bc39b6022cea0" output.type = "zone" output.intermediate_only = true @@ -746,8 +746,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "4067d742d832fa434217b95e4b149048d01ef54e" -source.sha256 = "9d3156b7895126b9df5460dd0c34668738a7f2d5894a4be0229644820e732895" +source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f" +source.sha256 = "32eaec41f0adff9dce87693ff21523768afdf9fb10ecd421e8c14d311104ee29" output.type = "zone" output.intermediate_only = true @@ -766,8 +766,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "4067d742d832fa434217b95e4b149048d01ef54e" -source.sha256 = "4eff4f00201ab8373510644693d066dbec2497142d48964be9844f0b30c147e8" +source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f" +source.sha256 = "3832c95ded1ab99604d2977fe629a9492197cc6c8f10db9ef9ea970b48af3dad" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index cf7ca60d05..a088627ee4 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -81,7 +81,7 @@ use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT}; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::{ - HostPortConfig, RackNetworkConfig, + HostPortConfig, RackNetworkConfig, SledIdentifiers, }; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, @@ -373,15 +373,18 @@ fn display_zone_init_errors(errors: &[(String, Box)]) -> String { /// Configuration parameters which modify the [`ServiceManager`]'s behavior. pub struct Config { /// Identifies the sled being configured - pub sled_id: Uuid, + pub sled_identifiers: SledIdentifiers, /// Identifies the revision of the sidecar to be used. pub sidecar_revision: SidecarRevision, } impl Config { - pub fn new(sled_id: Uuid, sidecar_revision: SidecarRevision) -> Self { - Self { sled_id, sidecar_revision } + pub fn new( + sled_identifiers: SledIdentifiers, + sidecar_revision: SidecarRevision, + ) -> Self { + Self { sled_identifiers, sidecar_revision } } } @@ -967,6 +970,7 @@ impl ServiceManager { .get() .expect("sled agent not started") .config + .sled_identifiers .sled_id } @@ -2670,15 +2674,42 @@ impl ServiceManager { if let Some(i) = info { dendrite_config = dendrite_config + .add_property( + "rack_id", + "astring", + &i.rack_id.to_string(), + ) .add_property( "sled_id", "astring", - &i.config.sled_id.to_string(), + &i.config + .sled_identifiers + .sled_id + .to_string(), ) .add_property( - "rack_id", + "sled_model", "astring", - &i.rack_id.to_string(), + &i.config + .sled_identifiers + .model + .to_string(), + ) + .add_property( + "sled_serial", + "astring", + &i.config + .sled_identifiers + .serial + .to_string(), + ) + .add_property( + "sled_revision", + "astring", + &i.config + .sled_identifiers + .revision + .to_string(), ); } @@ -2970,7 +3001,10 @@ impl ServiceManager { .add_property( "sled_uuid", "astring", - &i.config.sled_id.to_string(), + &i.config + .sled_identifiers + .sled_id + .to_string(), ) .add_property( "rack_uuid", @@ -3016,7 +3050,10 @@ impl ServiceManager { .add_property( "sled_uuid", "astring", - &i.config.sled_id.to_string(), + &i.config + .sled_identifiers + .sled_id + .to_string(), ) .add_property( "rack_uuid", @@ -4352,7 +4389,25 @@ impl ServiceManager { )?; smfh.setprop_default_instance( "config/sled_id", - info.config.sled_id, + info.config.sled_identifiers.sled_id, + )?; + smfh.setprop_default_instance( + "config/sled_model", + info.config + .sled_identifiers + .model + .to_string(), + )?; + smfh.setprop_default_instance( + "config/sled_revision", + info.config.sled_identifiers.revision, + )?; + smfh.setprop_default_instance( + "config/sled_serial", + info.config + .sled_identifiers + .serial + .to_string(), )?; } else { info!( @@ -4461,7 +4516,7 @@ impl ServiceManager { )?; smfh.setprop_default_instance( "config/sled_uuid", - info.config.sled_id, + info.config.sled_identifiers.sled_id, )?; } for address in &request.addresses { @@ -4504,7 +4559,7 @@ impl ServiceManager { )?; smfh.setprop_default_instance( "config/sled_uuid", - info.config.sled_id, + info.config.sled_identifiers.sled_id, )?; } smfh.delpropvalue_default_instance( @@ -4945,7 +5000,13 @@ mod illumos_tests { fn make_config(&self) -> Config { Config { - sled_id: Uuid::new_v4(), + sled_identifiers: SledIdentifiers { + rack_id: Uuid::new_v4(), + sled_id: Uuid::new_v4(), + model: "fake-gimlet".to_string(), + revision: 1, + serial: "fake-serial".to_string(), + }, sidecar_revision: SidecarRevision::Physical( "rev_whatever_its_a_test".to_string(), ), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 3106c3bb38..953136bb95 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -468,7 +468,7 @@ impl SledAgent { serial: baseboard.identifier().to_string(), }; let metrics_manager = - MetricsManager::new(&log, identifiers, *sled_address.ip())?; + MetricsManager::new(&log, identifiers.clone(), *sled_address.ip())?; // Start tracking the underlay physical links. for link in underlay::find_chelsio_links(&config.data_links)? { @@ -514,10 +514,8 @@ impl SledAgent { }; let updates = UpdateManager::new(update_config); - let svc_config = services::Config::new( - request.body.id.into_untyped_uuid(), - config.sidecar_revision.clone(), - ); + let svc_config = + services::Config::new(identifiers, config.sidecar_revision.clone()); // Get our rack network config from the bootstore; we cannot proceed // until we have this, as we need to know which switches have uplinks to diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 09378177d7..d10b0f184d 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="4067d742d832fa434217b95e4b149048d01ef54e" -SHA2="ff41c2a30f67c4ce79fc1c8dd99bce8042e855c51fd15362be4ee833acec58cf" +COMMIT="d40c43877c9a449e7b5119b8aef38bd8f10b041f" +SHA2="bcddcd4d600f5cb9bd73754125b4263312fcabadd2a521d44355633a796c8a71" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 40cf08e5c0..e3018e8c6d 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="5e9ccc42e5ac31f4be24025d2afd5978aef33d618f3cb7caa260eff73b7e6a79" -CIDL_SHA256_LINUX_DPD="5738cb74ea2657dd255f5ba094600265f5b10b99eeb70bd0b69cbb987b951b71" -CIDL_SHA256_LINUX_SWADM="30e085b64e33c374ec79d9e0e8cf015547e573b34525d2da228a8c99755557fb" +CIDL_SHA256_ILLUMOS="69fc73a4596777c0c4fbce002ec965094562d170e6c009d6743bc39b6022cea0" +CIDL_SHA256_LINUX_DPD="12d020eca4cb6f5cbe7509618c8c7d05c9f8a24edeb208f71648aca97ee5e4eb" +CIDL_SHA256_LINUX_SWADM="ef7e4e03c03690210238fc1a144d771d0fe5bf5a5b8dce3ef80d94b0b4c2664d"