Skip to content

Commit

Permalink
Update SMF properties for Dendrite to include sled identifiers (#6793)
Browse files Browse the repository at this point in the history
Includes:
  - package-manifest update to match the properties added to dpd
- removal of client call to get sled identifier information (no longer
used)
  - switch_slot added to oximeter schemas for dendrite/switch-data-link

Related:
  - oxidecomputer/dendrite#1031
  • Loading branch information
zeeshanlakhani authored Nov 7, 2024
1 parent c6ac54a commit 520c4d1
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 61 deletions.
30 changes: 0 additions & 30 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,36 +391,6 @@ impl From<omicron_common::api::internal::shared::NetworkInterfaceKind>
}
}

impl From<omicron_common::api::internal::shared::SledIdentifiers>
for types::SledIdentifiers
{
fn from(
value: omicron_common::api::internal::shared::SledIdentifiers,
) -> Self {
Self {
model: value.model,
rack_id: value.rack_id,
revision: value.revision,
serial: value.serial,
sled_id: value.sled_id,
}
}
}

impl From<types::SledIdentifiers>
for omicron_common::api::internal::shared::SledIdentifiers
{
fn from(value: types::SledIdentifiers) -> Self {
Self {
model: value.model,
rack_id: value.rack_id,
revision: value.revision,
serial: value.serial,
sled_id: value.sled_id,
}
}
}

/// Exposes additional [`Client`] interfaces for use by the test suite. These
/// are bonus endpoints, not generated in the real client.
#[async_trait]
Expand Down
25 changes: 25 additions & 0 deletions oximeter/db/schema/replicated/13/timeseries-to-delete.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
switch_data_link:bytes_sent
switch_data_link:bytes_received
switch_data_link:errors_sent
switch_data_link:errors_received
switch_data_link:receive_crc_error_drops
switch_data_link:receive_buffer_full_drops
switch_data_link:packets_sent
switch_data_link:packets_received
switch_data_link:link_up
switch_data_link:link_enabled
switch_data_link:link_fsm
switch_data_link:pcs_bad_sync_headers
switch_data_link:pcs_errored_blocks
switch_data_link:pcs_block_lock_loss
switch_data_link:pcs_high_ber
switch_data_link:pcs_valid_errors
switch_data_link:pcs_invalid_errors
switch_data_link:pcs_unknown_errors
switch_data_link:pcs_sync_loss
switch_data_link:fec_high_symbol_errors
switch_data_link:fec_sync_aligned
switch_data_link:fec_corrected_blocks
switch_data_link:fec_uncorrected_blocks
switch_data_link:fec_symbol_errors
dendrite:sample_collection_duration
25 changes: 25 additions & 0 deletions oximeter/db/schema/single-node/13/timeseries-to-delete.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
switch_data_link:bytes_sent
switch_data_link:bytes_received
switch_data_link:errors_sent
switch_data_link:errors_received
switch_data_link:receive_crc_error_drops
switch_data_link:receive_buffer_full_drops
switch_data_link:packets_sent
switch_data_link:packets_received
switch_data_link:link_up
switch_data_link:link_enabled
switch_data_link:link_fsm
switch_data_link:pcs_bad_sync_headers
switch_data_link:pcs_errored_blocks
switch_data_link:pcs_block_lock_loss
switch_data_link:pcs_high_ber
switch_data_link:pcs_valid_errors
switch_data_link:pcs_invalid_errors
switch_data_link:pcs_unknown_errors
switch_data_link:pcs_sync_loss
switch_data_link:fec_high_symbol_errors
switch_data_link:fec_sync_aligned
switch_data_link:fec_corrected_blocks
switch_data_link:fec_uncorrected_blocks
switch_data_link:fec_symbol_errors
dendrite:sample_collection_duration
26 changes: 25 additions & 1 deletion oximeter/oximeter/schema/dendrite.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ versions = [
# switch for its statistics, which is why these fields are included.
# Dendrite may eventually report statistics about itself, or other aspects
# not related to the switch, so they belong here, not the target.
{ added_in = 1, fields = [ "switch_model", "switch_revision", "switch_id", "switch_serial" ] }
{ added_in = 1, fields = [ "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] }
]

[fields.rack_id]
Expand Down Expand Up @@ -53,6 +53,30 @@ description = "Revision number of the switch being managed"
type = "uuid"
description = "ID of the switch being managed"

[fields.switch_fab]
type = "string"
description = "Fabrication plant identifier of the switch the link is on"

[fields.switch_lot]
type = "string"
description = "Lot number of the switch the link is on"

[fields.switch_wafer]
type = "u8"
description = "Wafer number of the switch the link is on"

[fields.switch_wafer_loc_x]
type = "i16"
description = "X-coordinate wafer location of the switch the link is on"

[fields.switch_wafer_loc_y]
type = "i16"
description = "Y-coordinate wafer location of the switch the link is on"

[fields.switch_serial]
type = "string"
description = "Serial number of the switch being managed"

[fields.switch_slot]
type = "u16"
description = "Slot number of the switch the link is on"
27 changes: 26 additions & 1 deletion oximeter/oximeter/schema/switch-data-link.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ format_version = 1
name = "switch_data_link"
description = "A network data link on an Oxide switch"
authz_scope = "fleet"

versions = [
{ version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_model", "switch_revision", "switch_serial" ] },
{ version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] },
]

[[metrics]]
Expand Down Expand Up @@ -251,6 +252,26 @@ description = "Serial number of the sled managing the link's switch"
type = "uuid"
description = "ID of the switch the link is on"

[fields.switch_fab]
type = "string"
description = "Fabrication plant identifier of the switch the link is on"

[fields.switch_lot]
type = "string"
description = "Lot number of the switch the link is on"

[fields.switch_wafer]
type = "u8"
description = "Wafer number of the switch the link is on"

[fields.switch_wafer_loc_x]
type = "i16"
description = "X-coordinate wafer location of the switch the link is on"

[fields.switch_wafer_loc_y]
type = "i16"
description = "Y-coordinate wafer location of the switch the link is on"

[fields.switch_model]
type = "string"
description = "The model number switch the link is on"
Expand All @@ -263,6 +284,10 @@ description = "Revision number of the switch the link is on"
type = "string"
description = "Serial number of the switch the link is on"

[fields.switch_slot]
type = "u16"
description = "Slot number of the switch the link is on"

[fields.port_id]
type = "string"
description = "Physical switch port the link is on"
Expand Down
12 changes: 6 additions & 6 deletions package-manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "4067d742d832fa434217b95e4b149048d01ef54e"
source.sha256 = "5e9ccc42e5ac31f4be24025d2afd5978aef33d618f3cb7caa260eff73b7e6a79"
source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f"
source.sha256 = "69fc73a4596777c0c4fbce002ec965094562d170e6c009d6743bc39b6022cea0"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -746,8 +746,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "4067d742d832fa434217b95e4b149048d01ef54e"
source.sha256 = "9d3156b7895126b9df5460dd0c34668738a7f2d5894a4be0229644820e732895"
source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f"
source.sha256 = "32eaec41f0adff9dce87693ff21523768afdf9fb10ecd421e8c14d311104ee29"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -766,8 +766,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "4067d742d832fa434217b95e4b149048d01ef54e"
source.sha256 = "4eff4f00201ab8373510644693d066dbec2497142d48964be9844f0b30c147e8"
source.commit = "d40c43877c9a449e7b5119b8aef38bd8f10b041f"
source.sha256 = "3832c95ded1ab99604d2977fe629a9492197cc6c8f10db9ef9ea970b48af3dad"
output.type = "zone"
output.intermediate_only = true

Expand Down
87 changes: 74 additions & 13 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT};
use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT};
use omicron_common::api::external::Generation;
use omicron_common::api::internal::shared::{
HostPortConfig, RackNetworkConfig,
HostPortConfig, RackNetworkConfig, SledIdentifiers,
};
use omicron_common::backoff::{
retry_notify, retry_policy_internal_service_aggressive, BackoffError,
Expand Down Expand Up @@ -373,15 +373,18 @@ fn display_zone_init_errors(errors: &[(String, Box<Error>)]) -> String {
/// Configuration parameters which modify the [`ServiceManager`]'s behavior.
pub struct Config {
/// Identifies the sled being configured
pub sled_id: Uuid,
pub sled_identifiers: SledIdentifiers,

/// Identifies the revision of the sidecar to be used.
pub sidecar_revision: SidecarRevision,
}

impl Config {
pub fn new(sled_id: Uuid, sidecar_revision: SidecarRevision) -> Self {
Self { sled_id, sidecar_revision }
pub fn new(
sled_identifiers: SledIdentifiers,
sidecar_revision: SidecarRevision,
) -> Self {
Self { sled_identifiers, sidecar_revision }
}
}

Expand Down Expand Up @@ -967,6 +970,7 @@ impl ServiceManager {
.get()
.expect("sled agent not started")
.config
.sled_identifiers
.sled_id
}

Expand Down Expand Up @@ -2670,15 +2674,42 @@ impl ServiceManager {

if let Some(i) = info {
dendrite_config = dendrite_config
.add_property(
"rack_id",
"astring",
&i.rack_id.to_string(),
)
.add_property(
"sled_id",
"astring",
&i.config.sled_id.to_string(),
&i.config
.sled_identifiers
.sled_id
.to_string(),
)
.add_property(
"rack_id",
"sled_model",
"astring",
&i.rack_id.to_string(),
&i.config
.sled_identifiers
.model
.to_string(),
)
.add_property(
"sled_serial",
"astring",
&i.config
.sled_identifiers
.serial
.to_string(),
)
.add_property(
"sled_revision",
"astring",
&i.config
.sled_identifiers
.revision
.to_string(),
);
}

Expand Down Expand Up @@ -2970,7 +3001,10 @@ impl ServiceManager {
.add_property(
"sled_uuid",
"astring",
&i.config.sled_id.to_string(),
&i.config
.sled_identifiers
.sled_id
.to_string(),
)
.add_property(
"rack_uuid",
Expand Down Expand Up @@ -3016,7 +3050,10 @@ impl ServiceManager {
.add_property(
"sled_uuid",
"astring",
&i.config.sled_id.to_string(),
&i.config
.sled_identifiers
.sled_id
.to_string(),
)
.add_property(
"rack_uuid",
Expand Down Expand Up @@ -4352,7 +4389,25 @@ impl ServiceManager {
)?;
smfh.setprop_default_instance(
"config/sled_id",
info.config.sled_id,
info.config.sled_identifiers.sled_id,
)?;
smfh.setprop_default_instance(
"config/sled_model",
info.config
.sled_identifiers
.model
.to_string(),
)?;
smfh.setprop_default_instance(
"config/sled_revision",
info.config.sled_identifiers.revision,
)?;
smfh.setprop_default_instance(
"config/sled_serial",
info.config
.sled_identifiers
.serial
.to_string(),
)?;
} else {
info!(
Expand Down Expand Up @@ -4461,7 +4516,7 @@ impl ServiceManager {
)?;
smfh.setprop_default_instance(
"config/sled_uuid",
info.config.sled_id,
info.config.sled_identifiers.sled_id,
)?;
}
for address in &request.addresses {
Expand Down Expand Up @@ -4504,7 +4559,7 @@ impl ServiceManager {
)?;
smfh.setprop_default_instance(
"config/sled_uuid",
info.config.sled_id,
info.config.sled_identifiers.sled_id,
)?;
}
smfh.delpropvalue_default_instance(
Expand Down Expand Up @@ -4945,7 +5000,13 @@ mod illumos_tests {

fn make_config(&self) -> Config {
Config {
sled_id: Uuid::new_v4(),
sled_identifiers: SledIdentifiers {
rack_id: Uuid::new_v4(),
sled_id: Uuid::new_v4(),
model: "fake-gimlet".to_string(),
revision: 1,
serial: "fake-serial".to_string(),
},
sidecar_revision: SidecarRevision::Physical(
"rev_whatever_its_a_test".to_string(),
),
Expand Down
Loading

0 comments on commit 520c4d1

Please sign in to comment.