Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
bnaecker committed Nov 14, 2023
1 parent daf4c34 commit ab6460c
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 0 deletions.
21 changes: 21 additions & 0 deletions common/src/api/internal/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,34 @@ pub struct SledInstanceState {

// Oximeter producer/collector objects.

/// The _kind_ of metric producer this is.
#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ProducerKind {
/// The producer is a sled-agent.
SledAgent,
/// The producer is an Oxide-managed service.
Service,
/// The producer is a Propolis VMM managing a guest instance.
Instance,
}

/// Information announced by a metric server, used so that clients can contact it and collect
/// available metric data from it.
#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)]
pub struct ProducerEndpoint {
/// A unique ID for this producer.
pub id: Uuid,
/// The kind of producer.
pub kind: ProducerKind,
/// The IP address and port at which `oximeter` can collect metrics from the
/// producer.
pub address: SocketAddr,
/// The API base route from which `oximeter` can collect metrics.
///
/// The full route is `{base_route}/{id}`.
pub base_route: String,
/// The interval on which `oximeter` should collect metrics.
pub interval: Duration,
}

Expand Down
27 changes: 27 additions & 0 deletions nexus/db-model/src/producer_endpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,31 @@ use db_macros::Asset;
use nexus_types::identity::Asset;
use omicron_common::api::internal;
use uuid::Uuid;
use crate::impl_enum_type;

impl_enum_type!(
#[derive(SqlType, Clone, Debug, QueryId)]
#[diesel(postgres_type(name = "producer_kind"))]
pub struct ProducerKindEnum;

#[derive(AsExpression, Clone, Debug, FromSqlRow, PartialEq)]
#[diesel(sql_type = ProducerKindEnum)]
pub enum ProducerKind;

SledAgent => b"sled_agent"
Service => b"service"
Instance => b"instance"
);

impl From<internal::nexus::ProducerKind> for ProducerKind {
fn from(kind: internal::nexus::ProducerKind) -> Self {
match kind {
internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent,
internal::nexus::ProducerKind::Service => ProducerKind::Service,
internal::nexus::ProducerKind::Instance => ProducerKind::Instance,
}
}
}

/// Information announced by a metric server, used so that clients can contact it and collect
/// available metric data from it.
Expand All @@ -17,6 +42,7 @@ pub struct ProducerEndpoint {
#[diesel(embed)]
identity: ProducerEndpointIdentity,

pub kind: ProducerKind,
pub ip: ipnetwork::IpNetwork,
pub port: SqlU16,
pub interval: f64,
Expand All @@ -33,6 +59,7 @@ impl ProducerEndpoint {
) -> Self {
Self {
identity: ProducerEndpointIdentity::new(endpoint.id),
kind: endpoint.kind.into(),
ip: endpoint.address.ip().into(),
port: endpoint.address.port().into(),
base_route: endpoint.base_route.clone(),
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ table! {
id -> Uuid,
time_created -> Timestamptz,
time_modified -> Timestamptz,
kind -> crate::ProducerKindEnum,
ip -> Inet,
port -> Int4,
interval -> Float8,
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/oximeter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ impl super::Nexus {
pub(crate) async fn register_as_producer(&self, address: SocketAddr) {
let producer_endpoint = nexus::ProducerEndpoint {
id: self.id,
kind: nexus::ProducerKind::Service,
address,
base_route: String::from("/metrics/collect"),
interval: Duration::from_secs(10),
Expand Down
2 changes: 2 additions & 0 deletions nexus/test-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
use omicron_common::api::external::MacAddr;
use omicron_common::api::external::{IdentityMetadata, Name};
use omicron_common::api::internal::nexus::ProducerEndpoint;
use omicron_common::api::internal::nexus::ProducerKind;
use omicron_common::api::internal::shared::SwitchLocation;
use omicron_common::nexus_config;
use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
Expand Down Expand Up @@ -1092,6 +1093,7 @@ pub async fn start_producer_server(
let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0);
let server_info = ProducerEndpoint {
id,
kind: ProducerKind::Service,
address: producer_address,
base_route: "/collect".to_string(),
interval: Duration::from_secs(1),
Expand Down
4 changes: 4 additions & 0 deletions oximeter/collector/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ mod tests {
use super::CollectionMessage;
use super::OximeterAgent;
use super::ProducerEndpoint;
use omicron_common::api::internal::nexus::ProducerKind;
use crate::self_stats::FailureReason;
use hyper::service::make_service_fn;
use hyper::service::service_fn;
Expand Down Expand Up @@ -694,6 +695,7 @@ mod tests {
let interval = Duration::from_secs(1);
let endpoint = ProducerEndpoint {
id: Uuid::new_v4(),
kind: ProducerKind::Service,
address,
base_route: String::from("/"),
interval,
Expand Down Expand Up @@ -752,6 +754,7 @@ mod tests {
let interval = Duration::from_secs(1);
let endpoint = ProducerEndpoint {
id: Uuid::new_v4(),
kind: ProducerKind::Service,
address: SocketAddr::V6(SocketAddrV6::new(
Ipv6Addr::LOCALHOST,
0,
Expand Down Expand Up @@ -840,6 +843,7 @@ mod tests {
let interval = Duration::from_secs(1);
let endpoint = ProducerEndpoint {
id: Uuid::new_v4(),
kind: ProducerKind::Service,
address,
base_route: String::from("/"),
interval,
Expand Down
2 changes: 2 additions & 0 deletions oximeter/producer/examples/producer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use dropshot::ConfigLogging;
use dropshot::ConfigLoggingLevel;
use dropshot::HandlerTaskMode;
use omicron_common::api::internal::nexus::ProducerEndpoint;
use omicron_common::api::internal::nexus::ProducerKind;
use oximeter::types::Cumulative;
use oximeter::types::ProducerRegistry;
use oximeter::types::Sample;
Expand Down Expand Up @@ -124,6 +125,7 @@ async fn main() -> anyhow::Result<()> {
registry.register_producer(producer).unwrap();
let server_info = ProducerEndpoint {
id: registry.producer_id(),
kind: ProducerKind::Service,
address: args.address,
base_route: "/collect".to_string(),
interval: Duration::from_secs(10),
Expand Down
23 changes: 23 additions & 0 deletions schema/crdb/11.0.0/up01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Drop the entire metric producer assignment table.
*
* Programs wishing to produce metrics need to register with Nexus. That creates
* an assignment of the producer to a collector, which is recorded in this
* table. That registration is idempotent, and every _current_ producer will
* register when it restarts. For example, `dpd` includes a task that registers
* with Nexus, so each time it (re)starts, that registration will happen.
*
* With that in mind, dropping this table is safe, _because all updates are
* currently offline_. The current metric producers are:
*
* - `dpd`
* - Each `nexus` instance
* - Each `sled-agent` instance
* - The Propolis server for each guest Instance
*
* Each of this either does not exist at the time of an update, or will be
* restarted afterwards. Each will re-register, and so dropping this table one
* time is safe. It will be recreated in later schema upgrade files in this same
* update.
*/
DROP TABLE IF EXISTS omicron.public.metric_producer;
11 changes: 11 additions & 0 deletions schema/crdb/11.0.0/up02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* The kind of metric producer each record corresponds to.
*/
CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM (
-- A sled agent for an entry in the sled table.
'sled_agent',
-- A service in the omicron.public.service table
'service',
-- A Propolis VMM for an instance in the omicron.public.instance table
'instance'
);
17 changes: 17 additions & 0 deletions schema/crdb/11.0.0/up03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Recreate the metric producer assignment table.
*
* Note that we're adding the `kind` column here, using the new enum in the
* previous update SQL file.
*/
CREATE TABLE IF NOT EXISTS omicron.public.metric_producer (
id UUID PRIMARY KEY,
time_created TIMESTAMPTZ NOT NULL,
time_modified TIMESTAMPTZ NOT NULL,
kind omicron.public.producer_kind NOT NULL,
ip INET NOT NULL,
port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL,
interval FLOAT NOT NULL,
base_route STRING(512) NOT NULL,
oximeter_id UUID NOT NULL
);
8 changes: 8 additions & 0 deletions schema/crdb/11.0.0/up04.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Recreate index to support looking up a producer by its assigned oximeter
* collector ID.
*/
CREATE UNIQUE INDEX IF NOT EXISTS lookup_producer_by_oximeter ON omicron.public.metric_producer (
oximeter_id,
id
);
13 changes: 13 additions & 0 deletions schema/crdb/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1108,13 +1108,26 @@ CREATE TABLE IF NOT EXISTS omicron.public.oximeter (
port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL
);

/*
* The kind of metric producer each record corresponds to.
*/
CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM (
-- A sled agent for an entry in the sled table.
'sled_agent',
-- A service in the omicron.public.service table
'service',
-- A Propolis VMM for an instance in the omicron.public.instance table
'instance'
);

/*
* Information about registered metric producers.
*/
CREATE TABLE IF NOT EXISTS omicron.public.metric_producer (
id UUID PRIMARY KEY,
time_created TIMESTAMPTZ NOT NULL,
time_modified TIMESTAMPTZ NOT NULL,
kind omicron.public.producer_kind NOT NULL,
ip INET NOT NULL,
port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL,
interval FLOAT NOT NULL,
Expand Down
2 changes: 2 additions & 0 deletions sled-agent/src/sim/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use omicron_common::api::external::Generation;
use omicron_common::api::external::ResourceType;
use omicron_common::api::internal::nexus::DiskRuntimeState;
use omicron_common::api::internal::nexus::ProducerEndpoint;
use omicron_common::api::internal::nexus::ProducerKind;
use oximeter_producer::LogConfig;
use oximeter_producer::Server as ProducerServer;
use propolis_client::api::DiskAttachmentState as PropolisDiskState;
Expand Down Expand Up @@ -168,6 +169,7 @@ impl SimDisk {
let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0);
let server_info = ProducerEndpoint {
id,
kind: ProducerKind::SledAgent,
address: producer_address,
base_route: "/collect".to_string(),
interval: Duration::from_millis(200),
Expand Down
2 changes: 2 additions & 0 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use omicron_common::address::{
};
use omicron_common::api::external::Vni;
use omicron_common::api::internal::nexus::ProducerEndpoint;
use omicron_common::api::internal::nexus::ProducerKind;
use omicron_common::api::internal::nexus::{
SledInstanceState, VmmRuntimeState,
};
Expand Down Expand Up @@ -504,6 +505,7 @@ impl SledAgent {
// Nexus. This should not block progress here.
let endpoint = ProducerEndpoint {
id: request.body.id,
kind: ProducerKind::SledAgent,
address: sled_address.into(),
base_route: String::from("/metrics/collect"),
interval: crate::metrics::METRIC_COLLECTION_INTERVAL,
Expand Down

0 comments on commit ab6460c

Please sign in to comment.