From ab6460c3be0f72dae1b85df112ab0dc649e28938 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 14 Nov 2023 19:52:14 +0000 Subject: [PATCH] WIP --- common/src/api/internal/nexus.rs | 21 +++++++++++++++++++ nexus/db-model/src/producer_endpoint.rs | 27 +++++++++++++++++++++++++ nexus/db-model/src/schema.rs | 1 + nexus/src/app/oximeter.rs | 1 + nexus/test-utils/src/lib.rs | 2 ++ oximeter/collector/src/agent.rs | 4 ++++ oximeter/producer/examples/producer.rs | 2 ++ schema/crdb/11.0.0/up01.sql | 23 +++++++++++++++++++++ schema/crdb/11.0.0/up02.sql | 11 ++++++++++ schema/crdb/11.0.0/up03.sql | 17 ++++++++++++++++ schema/crdb/11.0.0/up04.sql | 8 ++++++++ schema/crdb/dbinit.sql | 13 ++++++++++++ sled-agent/src/sim/disk.rs | 2 ++ sled-agent/src/sled_agent.rs | 2 ++ 14 files changed, 134 insertions(+) create mode 100644 schema/crdb/11.0.0/up01.sql create mode 100644 schema/crdb/11.0.0/up02.sql create mode 100644 schema/crdb/11.0.0/up03.sql create mode 100644 schema/crdb/11.0.0/up04.sql diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index a4a539ad9b1..c3b852aa1f3 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -84,13 +84,34 @@ pub struct SledInstanceState { // Oximeter producer/collector objects. +/// The _kind_ of metric producer this is. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ProducerKind { + /// The producer is a sled-agent. + SledAgent, + /// The producer is an Oxide-managed service. + Service, + /// The producer is a Propolis VMM managing a guest instance. + Instance, +} + /// Information announced by a metric server, used so that clients can contact it and collect /// available metric data from it. #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] pub struct ProducerEndpoint { + /// A unique ID for this producer. pub id: Uuid, + /// The kind of producer. + pub kind: ProducerKind, + /// The IP address and port at which `oximeter` can collect metrics from the + /// producer. pub address: SocketAddr, + /// The API base route from which `oximeter` can collect metrics. + /// + /// The full route is `{base_route}/{id}`. pub base_route: String, + /// The interval on which `oximeter` should collect metrics. pub interval: Duration, } diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 29e57b0877e..04aaa15dfb1 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -8,6 +8,31 @@ use db_macros::Asset; use nexus_types::identity::Asset; use omicron_common::api::internal; use uuid::Uuid; +use crate::impl_enum_type; + +impl_enum_type!( + #[derive(SqlType, Clone, Debug, QueryId)] + #[diesel(postgres_type(name = "producer_kind"))] + pub struct ProducerKindEnum; + + #[derive(AsExpression, Clone, Debug, FromSqlRow, PartialEq)] + #[diesel(sql_type = ProducerKindEnum)] + pub enum ProducerKind; + + SledAgent => b"sled_agent" + Service => b"service" + Instance => b"instance" +); + +impl From for ProducerKind { + fn from(kind: internal::nexus::ProducerKind) -> Self { + match kind { + internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent, + internal::nexus::ProducerKind::Service => ProducerKind::Service, + internal::nexus::ProducerKind::Instance => ProducerKind::Instance, + } + } +} /// Information announced by a metric server, used so that clients can contact it and collect /// available metric data from it. @@ -17,6 +42,7 @@ pub struct ProducerEndpoint { #[diesel(embed)] identity: ProducerEndpointIdentity, + pub kind: ProducerKind, pub ip: ipnetwork::IpNetwork, pub port: SqlU16, pub interval: f64, @@ -33,6 +59,7 @@ impl ProducerEndpoint { ) -> Self { Self { identity: ProducerEndpointIdentity::new(endpoint.id), + kind: endpoint.kind.into(), ip: endpoint.address.ip().into(), port: endpoint.address.port().into(), base_route: endpoint.base_route.clone(), diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7c6b8bbd0a2..e2c14816704 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -399,6 +399,7 @@ table! { id -> Uuid, time_created -> Timestamptz, time_modified -> Timestamptz, + kind -> crate::ProducerKindEnum, ip -> Inet, port -> Int4, interval -> Float8, diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 03f833b087e..a1b21db8350 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -139,6 +139,7 @@ impl super::Nexus { pub(crate) async fn register_as_producer(&self, address: SocketAddr) { let producer_endpoint = nexus::ProducerEndpoint { id: self.id, + kind: nexus::ProducerKind::Service, address, base_route: String::from("/metrics/collect"), interval: Duration::from_secs(10), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 647232031d8..52ff8910f91 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -30,6 +30,7 @@ use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; use omicron_common::api::external::MacAddr; use omicron_common::api::external::{IdentityMetadata, Name}; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config; use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; @@ -1092,6 +1093,7 @@ pub async fn start_producer_server( let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, + kind: ProducerKind::Service, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_secs(1), diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 23ff32ed668..ab6d4abfc99 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -640,6 +640,7 @@ mod tests { use super::CollectionMessage; use super::OximeterAgent; use super::ProducerEndpoint; + use omicron_common::api::internal::nexus::ProducerKind; use crate::self_stats::FailureReason; use hyper::service::make_service_fn; use hyper::service::service_fn; @@ -694,6 +695,7 @@ mod tests { let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval, @@ -752,6 +754,7 @@ mod tests { let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), + kind: ProducerKind::Service, address: SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, 0, @@ -840,6 +843,7 @@ mod tests { let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval, diff --git a/oximeter/producer/examples/producer.rs b/oximeter/producer/examples/producer.rs index dd9722c80a9..8dbe0b6ad97 100644 --- a/oximeter/producer/examples/producer.rs +++ b/oximeter/producer/examples/producer.rs @@ -15,6 +15,7 @@ use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use dropshot::HandlerTaskMode; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; use oximeter::types::Cumulative; use oximeter::types::ProducerRegistry; use oximeter::types::Sample; @@ -124,6 +125,7 @@ async fn main() -> anyhow::Result<()> { registry.register_producer(producer).unwrap(); let server_info = ProducerEndpoint { id: registry.producer_id(), + kind: ProducerKind::Service, address: args.address, base_route: "/collect".to_string(), interval: Duration::from_secs(10), diff --git a/schema/crdb/11.0.0/up01.sql b/schema/crdb/11.0.0/up01.sql new file mode 100644 index 00000000000..acc666a98fc --- /dev/null +++ b/schema/crdb/11.0.0/up01.sql @@ -0,0 +1,23 @@ +/* + * Drop the entire metric producer assignment table. + * + * Programs wishing to produce metrics need to register with Nexus. That creates + * an assignment of the producer to a collector, which is recorded in this + * table. That registration is idempotent, and every _current_ producer will + * register when it restarts. For example, `dpd` includes a task that registers + * with Nexus, so each time it (re)starts, that registration will happen. + * + * With that in mind, dropping this table is safe, _because all updates are + * currently offline_. The current metric producers are: + * + * - `dpd` + * - Each `nexus` instance + * - Each `sled-agent` instance + * - The Propolis server for each guest Instance + * + * Each of this either does not exist at the time of an update, or will be + * restarted afterwards. Each will re-register, and so dropping this table one + * time is safe. It will be recreated in later schema upgrade files in this same + * update. + */ +DROP TABLE IF EXISTS omicron.public.metric_producer; diff --git a/schema/crdb/11.0.0/up02.sql b/schema/crdb/11.0.0/up02.sql new file mode 100644 index 00000000000..96c4c5d6b47 --- /dev/null +++ b/schema/crdb/11.0.0/up02.sql @@ -0,0 +1,11 @@ +/* + * The kind of metric producer each record corresponds to. + */ +CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( + -- A sled agent for an entry in the sled table. + 'sled_agent', + -- A service in the omicron.public.service table + 'service', + -- A Propolis VMM for an instance in the omicron.public.instance table + 'instance' +); diff --git a/schema/crdb/11.0.0/up03.sql b/schema/crdb/11.0.0/up03.sql new file mode 100644 index 00000000000..e5a3fceb984 --- /dev/null +++ b/schema/crdb/11.0.0/up03.sql @@ -0,0 +1,17 @@ +/* + * Recreate the metric producer assignment table. + * + * Note that we're adding the `kind` column here, using the new enum in the + * previous update SQL file. + */ +CREATE TABLE IF NOT EXISTS omicron.public.metric_producer ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + kind omicron.public.producer_kind NOT NULL, + ip INET NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + interval FLOAT NOT NULL, + base_route STRING(512) NOT NULL, + oximeter_id UUID NOT NULL +); diff --git a/schema/crdb/11.0.0/up04.sql b/schema/crdb/11.0.0/up04.sql new file mode 100644 index 00000000000..cad33ddcf2d --- /dev/null +++ b/schema/crdb/11.0.0/up04.sql @@ -0,0 +1,8 @@ +/* + * Recreate index to support looking up a producer by its assigned oximeter + * collector ID. + */ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_producer_by_oximeter ON omicron.public.metric_producer ( + oximeter_id, + id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 875877ee967..fbf9b670b1f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1108,6 +1108,18 @@ CREATE TABLE IF NOT EXISTS omicron.public.oximeter ( port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL ); +/* + * The kind of metric producer each record corresponds to. + */ +CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( + -- A sled agent for an entry in the sled table. + 'sled_agent', + -- A service in the omicron.public.service table + 'service', + -- A Propolis VMM for an instance in the omicron.public.instance table + 'instance' +); + /* * Information about registered metric producers. */ @@ -1115,6 +1127,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.metric_producer ( id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, + kind omicron.public.producer_kind NOT NULL, ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, interval FLOAT NOT NULL, diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index 2d2c18be258..2b648267790 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; use propolis_client::api::DiskAttachmentState as PropolisDiskState; @@ -168,6 +169,7 @@ impl SimDisk { let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, + kind: ProducerKind::SledAgent, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_millis(200), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 906993ad00a..7f1ff96723e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -42,6 +42,7 @@ use omicron_common::address::{ }; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; @@ -504,6 +505,7 @@ impl SledAgent { // Nexus. This should not block progress here. let endpoint = ProducerEndpoint { id: request.body.id, + kind: ProducerKind::SledAgent, address: sled_address.into(), base_route: String::from("/metrics/collect"), interval: crate::metrics::METRIC_COLLECTION_INTERVAL,