From 170ef36b9ddcbf439d3a82d675bdeea0ecd67d1d Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 16 Jul 2024 21:33:07 +0000 Subject: [PATCH 1/3] Define switch data link timeseries in TOML Note that this renames and reorganizes the timeseries a good deal. We want to include more of the switch / sled identifiers, and make the name of the timeseries more consistent with the existing sled data link and planned instance data link timeseries. --- .../oximeter/schema/switch-data-link.toml | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 oximeter/oximeter/schema/switch-data-link.toml diff --git a/oximeter/oximeter/schema/switch-data-link.toml b/oximeter/oximeter/schema/switch-data-link.toml new file mode 100644 index 0000000000..81b9a47e64 --- /dev/null +++ b/oximeter/oximeter/schema/switch-data-link.toml @@ -0,0 +1,222 @@ +format_version = 1 + +[target] +name = "switch_data_link" +description = "A network data link on an Oxide switch" +authz_scope = "fleet" +versions = [ + { version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_model", "switch_revision", "switch_serial" ] }, +] + +[[metrics]] +name = "bytes_sent" +description = "Total number of bytes sent on the data link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "bytes_received" +description = "Total number of bytes received on the data link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "errors_sent" +description = "Total number of errors when sending on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "errors_received" +description = "Total number of packets for the data link dropped due to any error" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "receive_crc_error_drops" +description = "Total number of packets for the data link dropped due to CRC errors" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "receive_buffer_full_drops" +description = "Total number of packets for the data link dropped due to ASIC buffer congestion" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "packets_sent" +description = "Total number of packets sent on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "packets_received" +description = "Total number of packets received on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "packets_received" +description = "Total number of packets received on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "link_up" +description = "Reports whether the link is currently up" +units = "none" +datum_type = "bool" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "link_fsm" +description = """\ +Total entries into each state of the autonegotation / \ +link-training finite state machine\ +""" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id", "state" ] } +] + +[[metrics]] +name = "pcs_bad_sync_headers" +description = "Total number of bad PCS sync headers on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_errored_blocks" +description = "Total number of PCS error blocks on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_block_lock_loss" +description = "Total number of detected losses of block-lock on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_high_ber" +description = "Total number of high bit-error-rate events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_valid_errors" +description = "Total number of valid error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_invalid_errors" +description = "Total number of invalid error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_unknown_errors" +description = "Total number of unknown error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[fields.rack_id] +type = "uuid" +description = "ID of the rack the link's switch is in" + +[fields.sled_id] +type = "uuid" +description = "ID of the sled managing the link's switch" + +[fields.sled_model] +type = "string" +description = "Model number of the sled managing the link's switch" + +[fields.sled_revision] +type = "u32" +description = "Revision number of the sled managing the link's switch" + +[fields.sled_serial] +type = "string" +description = "Serial number of the sled managing the link's switch" + +[fields.switch_id] +type = "uuid" +description = "ID of the switch the link is on" + +[fields.switch_model] +type = "string" +description = "The model number switch the link is on" + +[fields.switch_revision] +type = "u32" +description = "Revision number of the switch the link is on" + +[fields.switch_serial] +type = "string" +description = "Serial number of the switch the link is on" + +[fields.port_id] +type = "string" +description = "Physical switch port the link is on" + +[fields.link_id] +type = "u8" +description = "ID of the link within its switch port" + +[fields.state] +type = "string" +description = "Name of the data link FSM state" From 7ebafcefa34c006085b55d1f45b85354fa1fa93c Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 18 Jul 2024 21:37:11 +0000 Subject: [PATCH 2/3] Add Units::None, remove duplicated timeseries --- oximeter/impl/src/schema/codegen.rs | 1 + oximeter/impl/src/schema/mod.rs | 2 ++ oximeter/oximeter/schema/switch-data-link.toml | 9 --------- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/impl/src/schema/codegen.rs index cde67439de..ef686c3cdd 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/impl/src/schema/codegen.rs @@ -515,6 +515,7 @@ fn quote_creation_time(created: DateTime) -> TokenStream { impl quote::ToTokens for Units { fn to_tokens(&self, tokens: &mut TokenStream) { let toks = match self { + Units::None => quote! { ::oximeter::schema::Units::None }, Units::Count => quote! { ::oximeter::schema::Units::Count }, Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/impl/src/schema/mod.rs index 83a83e95b2..7743034e31 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/impl/src/schema/mod.rs @@ -183,6 +183,8 @@ pub struct TimeseriesDescription { // TODO-completeness: Decide whether and how to handle dimensional analysis // during queries, if needed. pub enum Units { + /// No meaningful units, e.g. a dimensionless quanity. + None, Count, Bytes, Seconds, diff --git a/oximeter/oximeter/schema/switch-data-link.toml b/oximeter/oximeter/schema/switch-data-link.toml index 81b9a47e64..fa10759ca9 100644 --- a/oximeter/oximeter/schema/switch-data-link.toml +++ b/oximeter/oximeter/schema/switch-data-link.toml @@ -80,15 +80,6 @@ versions = [ { added_in = 1, fields = [ "port_id", "link_id" ] } ] -[[metrics]] -name = "packets_received" -description = "Total number of packets received on the data link" -units = "count" -datum_type = "cumulative_u64" -versions = [ - { added_in = 1, fields = [ "port_id", "link_id" ] } -] - [[metrics]] name = "link_up" description = "Reports whether the link is currently up" From ea8e04155df20d01a64c93f92af7609261fe0506 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 19 Jul 2024 16:30:25 +0000 Subject: [PATCH 3/3] update openapi spec --- openapi/nexus.json | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/openapi/nexus.json b/openapi/nexus.json index 9e46a92039..a4baa8124f 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19797,12 +19797,23 @@ }, "Units": { "description": "Measurement units for timeseries samples.", - "type": "string", - "enum": [ - "count", - "bytes", - "seconds", - "nanoseconds" + "oneOf": [ + { + "type": "string", + "enum": [ + "count", + "bytes", + "seconds", + "nanoseconds" + ] + }, + { + "description": "No meaningful units, e.g. a dimensionless quanity.", + "type": "string", + "enum": [ + "none" + ] + } ] }, "User": {